123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- #!/usr/bin/env python
- #coding=utf-8
- spec_char = ['栏目剧', '题材', '电视剧', '连续剧', '剧情', '。', ';', ';', ',', ',', '、', '/', ':', ':', '\\', '[1]', '[2]', '[3]', '[4]', '[5]', '[6]', '[7]', '[8]', '[9]', '[10]']
- import sys
- from fty_util.common import Mysql, Util
- reload(sys)
- sys.setdefaultencoding('utf8')
- conn = Mysql.createOfflineConn()
- # 清空数据
- sql = """
- select id, director, scriptwritter, main_actors, types, areas, plat_form, pub_comp, online_form, production from yxb.ad_tv_lib where categories = '' or categories is null order by id asc
- """
- rows = Mysql.getAll(sql, conn=conn)
- for row in rows:
- _id = row['id']
- director = row['director']
- scriptwritter = row['scriptwritter']
- main_actors = row['main_actors']
- types = row['types']
- areas = row['areas']
- plat_form = row['plat_form']
- pub_comp = row['pub_comp']
- online_form = row['online_form']
- production = row['production']
- if director is not None and len(director) > 0:
- for char in spec_char:
- director = director.replace(char, " ")
- if scriptwritter is not None and len(scriptwritter) > 0:
- for char in spec_char:
- scriptwritter = scriptwritter.replace(char, " ")
- if main_actors is not None and len(main_actors) > 0:
- for char in spec_char:
- main_actors = main_actors.replace(char, " ")
- if areas is not None and len(areas) > 0:
- for char in spec_char:
- areas = areas.replace(char, " ")
- if plat_form is not None and len(plat_form) > 0:
- for char in spec_char:
- plat_form = plat_form.replace(char, " ")
-
- if pub_comp is not None and len(pub_comp) > 0:
- for char in spec_char:
- pub_comp = pub_comp.replace(char, " ")
- if online_form is not None and len(online_form) > 0:
- for char in spec_char:
- online_form = online_form.replace(char, " ")
- if production is not None and len(production) > 0:
- for char in spec_char:
- production = production.replace(char, " ")
- if types is not None and len(types) > 0:
- for char in spec_char:
- types = types.replace(char, " ")
- types = types.replace(" ", " ")
- sql = """
- update yxb.ad_tv_lib set director = '%s', scriptwritter = '%s', main_actors = '%s', types = '%s', areas = '%s', plat_form = '%s', pub_comp = '%s', online_form = '%s', production = '%s' where id = '%s'
- """
- sql = sql % (director, scriptwritter, main_actors, types, areas, plat_form, pub_comp, online_form, production, _id)
- Mysql.execute(sql, conn=conn)
- # 更新 scrapy.types_analyse 类型字段
- Mysql.close(conn)
- # type_set = set()
- # for row in rows:
- # _id = row['id']
- # types = row['types']
- # if types is not None and len(types) > 0:
- # for char in spec_char:
- # types = types.replace(char, " ")
-
- # for _type in types.split(" "):
- # type_set.add(_type.strip())
- # for _type in type_set:
- # print _type
- # Mysql.close(conn)
|