ad_tv_lib_clean.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python
  2. #coding=utf-8
  3. spec_char = ['栏目剧', '题材', '电视剧', '连续剧', '剧情', '。', ';', ';', ',', ',', '、', '/', ':', ':', '\\', '[1]', '[2]', '[3]', '[4]', '[5]', '[6]', '[7]', '[8]', '[9]', '[10]']
  4. import sys
  5. from fty_util.common import Mysql, Util
  6. reload(sys)
  7. sys.setdefaultencoding('utf8')
  8. conn = Mysql.createOfflineConn()
  9. # 清空数据
  10. sql = """
  11. select id, director, scriptwritter, main_actors, types, areas, plat_form, pub_comp, online_form, production from yxb.ad_tv_lib where categories = '' or categories is null order by id asc
  12. """
  13. rows = Mysql.getAll(sql, conn=conn)
  14. for row in rows:
  15. _id = row['id']
  16. director = row['director']
  17. scriptwritter = row['scriptwritter']
  18. main_actors = row['main_actors']
  19. types = row['types']
  20. areas = row['areas']
  21. plat_form = row['plat_form']
  22. pub_comp = row['pub_comp']
  23. online_form = row['online_form']
  24. production = row['production']
  25. if director is not None and len(director) > 0:
  26. for char in spec_char:
  27. director = director.replace(char, " ")
  28. if scriptwritter is not None and len(scriptwritter) > 0:
  29. for char in spec_char:
  30. scriptwritter = scriptwritter.replace(char, " ")
  31. if main_actors is not None and len(main_actors) > 0:
  32. for char in spec_char:
  33. main_actors = main_actors.replace(char, " ")
  34. if areas is not None and len(areas) > 0:
  35. for char in spec_char:
  36. areas = areas.replace(char, " ")
  37. if plat_form is not None and len(plat_form) > 0:
  38. for char in spec_char:
  39. plat_form = plat_form.replace(char, " ")
  40. if pub_comp is not None and len(pub_comp) > 0:
  41. for char in spec_char:
  42. pub_comp = pub_comp.replace(char, " ")
  43. if online_form is not None and len(online_form) > 0:
  44. for char in spec_char:
  45. online_form = online_form.replace(char, " ")
  46. if production is not None and len(production) > 0:
  47. for char in spec_char:
  48. production = production.replace(char, " ")
  49. if types is not None and len(types) > 0:
  50. for char in spec_char:
  51. types = types.replace(char, " ")
  52. types = types.replace(" ", " ")
  53. sql = """
  54. update yxb.ad_tv_lib set director = '%s', scriptwritter = '%s', main_actors = '%s', types = '%s', areas = '%s', plat_form = '%s', pub_comp = '%s', online_form = '%s', production = '%s' where id = '%s'
  55. """
  56. sql = sql % (director, scriptwritter, main_actors, types, areas, plat_form, pub_comp, online_form, production, _id)
  57. Mysql.execute(sql, conn=conn)
  58. # 更新 scrapy.types_analyse 类型字段
  59. Mysql.close(conn)
  60. # type_set = set()
  61. # for row in rows:
  62. # _id = row['id']
  63. # types = row['types']
  64. # if types is not None and len(types) > 0:
  65. # for char in spec_char:
  66. # types = types.replace(char, " ")
  67. # for _type in types.split(" "):
  68. # type_set.add(_type.strip())
  69. # for _type in type_set:
  70. # print _type
  71. # Mysql.close(conn)