123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- #!/usr/bin/env python
- #coding=utf-8
- import datetime
- import sys
- from fty_util.common import Mysql
- reload(sys)
- sys.setdefaultencoding('utf8')
- conn = Mysql.createOfflineConn()
- cat_dict = {}
- # 获取所有标准分类和对应的映射分类
- sql = """
- select standard_category, reflect_category from odl.basic_categories
- """
- categories = Mysql.getAll(sql, conn=conn)
- for category in categories:
- standard_category = category['standard_category']
- reflect_category = category['reflect_category']
- cat_dict[reflect_category] = standard_category
- sql = """
- select tv_id, iqiyi_types, iqiyi_types_new, tengxun_types, tengxun_types_new, baike_types, baike_types_new, manual_types, manual_types_new from scrapy.types_analyse where iqiyi_tengxun_after_baike_after_manual is null or iqiyi_tengxun_after_baike_after_manual = '' order by tv_id asc
- """
- rows = Mysql.getAll(sql, conn=conn)
- for row in rows:
- tv_id = row['tv_id']
- iqiyi_types = row['iqiyi_types']
- iqiyi_types_new = row['iqiyi_types_new']
- tengxun_types = row['tengxun_types']
- tengxun_types_new = row['tengxun_types_new']
- baike_types = row['baike_types']
- baike_types_new = row['baike_types_new']
- manual_types = row['manual_types']
- manual_types_new = row['manual_types_new']
- iqiyi_types_set = set()
- if (iqiyi_types_new is None or len(iqiyi_types_new) == 0) and iqiyi_types is not None and len(iqiyi_types) > 0:
- for _type in iqiyi_types.split(' '):
- cate = cat_dict.get(_type)
- if cate is not None:
- iqiyi_types_set.add(cate)
- tengxun_types_set = set()
- if (tengxun_types_new is None or len(tengxun_types_new) == 0) and tengxun_types is not None and len(tengxun_types) > 0:
- for _type in tengxun_types.split(' '):
- cate = cat_dict.get(_type)
- if cate is not None:
- tengxun_types_set.add(cate)
- baike_types_set = set()
- if (baike_types_new is None or len(baike_types_new) == 0) and baike_types is not None and len(baike_types) > 0:
- for _type in baike_types.split(' '):
- cate = cat_dict.get(_type)
- if cate is not None:
- baike_types_set.add(cate)
- manual_types_set = set()
- if (manual_types_new is None or len(manual_types_new) == 0) and manual_types is not None and len(manual_types) > 0:
- for _type in manual_types.split(' '):
- cate = cat_dict.get(_type)
- if cate is not None:
- manual_types_set.add(cate)
- all_types = set()
- if len(iqiyi_types_set | tengxun_types_set) > 2:
- all_types = iqiyi_types_set | tengxun_types_set
- elif len(iqiyi_types_set | tengxun_types_set | baike_types_set) > 2:
- all_types = iqiyi_types_set | tengxun_types_set | baike_types_set
- elif len(iqiyi_types_set | tengxun_types_set | baike_types_set | manual_types_set) > 2:
- all_types = iqiyi_types_set | tengxun_types_set | baike_types_set | manual_types_set
- sql = """
- update scrapy.types_analyse set iqiyi_types_new = %s, tengxun_types_new = %s, baike_types_new = %s, manual_types_new = %s, iqiyi_tengxun_after_baike_after_manual = %s where tv_id = %s
- """
- value = (' '.join(iqiyi_types_set), ' '.join(tengxun_types_set), ' '.join(baike_types_set), ' '.join(manual_types_set), ' '.join(all_types), tv_id)
- Mysql.update(sql, param=value, conn=conn)
- # 更新 yxb.ad_tv_lib 表
- sql = """
- update yxb.ad_tv_lib set categories = %s where id = %s
- """
- value = (' '.join(all_types), tv_id)
- Mysql.update(sql, param=value, conn=conn)
- # 更新 odl.ad_tv_lib 表
- sql = """
- update odl.ad_tv_lib set categories = %s where tv_id = %s
- """
- value = (' '.join(all_types), tv_id)
- Mysql.update(sql, param=value, conn=conn)
|