#!/usr/bin/env python #coding=utf-8 """备案、发行表数据处理 """ import datetime import re import sys import time from fty_util.common import Mysql reload(sys) sys.setdefaultencoding('utf8') def parse_date(field, date_format): """ 日期转换 """ time_format = datetime.datetime.strptime(field, date_format) time_format = time_format.strftime(u'%Y-%m-%d') return time_format def parse_field(field): """ 处理字段,除了p_detail字段 """ if field is None or len(field) == 0: return "" else: field = strQ2B(field) return field.replace(' ', '') def strQ2B(ustring): """ 全角转半角 """ tstring = "" for uchar in ustring: inside_code = ord(uchar) # 全角空格直接转换 if inside_code == 12288: inside_code = 32 if inside_code == 160: inside_code = 32 # 全角字符(除空格)根据关系转化 elif (inside_code >= 65281 and inside_code <= 65374): inside_code -= 65248 tstring += unichr(inside_code) return tstring """ 从 odl.dsj_gongshi(电视剧备案数据)和odl.faxing(电视剧发行数据)表中提取数据到odl.ad_tv_record_distribution表中,作为数据分析来源数据 """ conn = Mysql.createOfflineConn() # 备案、发行表 sql = """ select id, name, area, theme, company, commit_company, show_date, license_id, form, num, film_date, film_period, `desc`, \ province_advice, relative_depart_advice, remark, scrapy_url, scrapy_date, scrapy_title, p_detail, scrapy_detail_url, \ union_company, scriptwriter, director from odl.dsj_gongshi order by id asc """ rows = Mysql.getAll(sql, conn=conn) # conn_max = dev_mysql_conn.Mysql() sql_max = """ select max(tv_id) tv_id from odl.ad_tv_record_distribution """ row_max = Mysql.getOne(sql_max, conn=conn) max_id = 0 if row_max is not None and len(row_max) > 0: max_id = row_max[0] for row in rows: _id = row['id'] if _id <= max_id: continue # 将p_detail字段转换为commit_company、show_date、license_id三个字段 p_detail = parse_field(row['p_detail']) commit_company = '' show_date = '' license_id = '' # 如果p_detail字段为空,则直接处理 if p_detail is None or len(p_detail) == 0: commit_company = parse_field(row['commit_company']) show_date = parse_field(row['show_date']) show_date = show_date.replace(u'年', '-').replace(u'月', '-') show_date = parse_date(show_date, u'%Y-%m-') license_id = parse_field(row['license_id']) else: try: str = p_detail.decode('utf8') xx=ur"\d+[\u5e74]\d+[\u6708]" p = re.compile(xx) date = p.findall(str)[0] company_license = str.split(date) commit_company = company_license[0] show_date = date show_date = parse_date(show_date, '%Y年%m月') license_id = company_license[-1] except Exception, e: pass # 原始格式 2016.11 film_date = parse_field(row['film_date']) try: film_date_pattern = re.compile('\d+') year_month = film_date_pattern.findall(film_date) year = year_month[0] month = year_month[1] film_date = parse_date(str(year) + '.' + str(month), '%Y.%m') except Exception, e: film_date = '' # film_date = parse_date(film_date, '%Y.%m') film_period = parse_field(row['film_period']) name = parse_field(row['name']) area = parse_field(row['area']) theme = parse_field(row['theme']) company = parse_field(row['company']) if company is None or len(company) == 0: company = commit_company form = parse_field(row['form']) num = parse_field(row['num']) desc = parse_field(row['desc']) province_advice = parse_field(row['province_advice']) relative_depart_advice = parse_field(row['relative_depart_advice']) remark = parse_field(row['remark']) scrapy_url = parse_field(row['scrapy_url']) scrapy_date = row['scrapy_date'] scrapy_title = parse_field(row['scrapy_title']) scrapy_detail_url = parse_field(row['scrapy_detail_url']) union_company = parse_field(row['union_company']) scriptwriter = parse_field(row['scriptwriter']) director = parse_field(row['director']) # 变更后的名称 current_name = '' # 变更后的公司 current_company = '' # 变更后的集数 current_num = 0 # conn_change_type1 = dev_mysql_conn.Mysql() # 电视剧名称变更 sql_change_type1 = """ select original_name, current_name from odl.dsj_change where original_name = '%s' and change_type = 1 """ sql_change_type1 = sql_change_type1 % (name) sql_change_type1_rows = Mysql.getAll(sql_change_type1, conn=conn) if len(sql_change_type1_rows) == 1: current_name = sql_change_type1_rows[0]['current_name'] elif len(sql_change_type1_rows) > 1: # 如果多余一条记录,则存放在dict中 name_dict = {} for row in sql_change_type1_rows: original_name = row['original_name'] current_name = row['current_name'] name_dict[original_name] = current_name while True: if len(name_dict) > 1: current_name = name_dict.get(name) if current_name is None or len(current_name) == 0: current_name = '' break del name_dict[name] else: current_name = name_dict.get(current_name) break # conn_change_type2 = dev_mysql_conn.Mysql() # 类型2变更 sql_change_type2 = """ select name, original_company, current_company from odl.dsj_change where name = '%s' and change_type = 2 """ sql_change_type2 = sql_change_type2 % (name) sql_change_type2_rows = Mysql.getAll(sql_change_type2, conn=conn) if len(sql_change_type2_rows) == 1: current_company = sql_change_type2_rows[0]['current_company'] elif len(sql_change_type2_rows) > 1: company_dict = {} for row in sql_change_type2_rows: original_company = row['original_company'] current_company = row['current_company'] company_dict[original_company] = current_company while True: if len(company_dict) > 1: current_company = company_dict.get(company) if current_company is None or len(current_company) == 0: current_company = '' break del company_dict[company] else: current_company = company_dict.get(current_company) break # conn_change_type3 = dev_mysql_conn.Mysql() # 类型3变更 sql_change_type3 = """ select name, original_num, current_num from odl.dsj_change where name = '%s' and change_type = 3 """ sql_change_type3 = sql_change_type3 % (name) sql_change_type3_rows = Mysql.getAll(sql_change_type3, conn=conn) if len(sql_change_type3_rows) == 1: current_num = sql_change_type3_rows[0]['current_num'] elif len(sql_change_type3_rows) > 1: num_dict = {} for row in sql_change_type3_rows: original_num = row['original_num'] current_num = row['current_num'] num_dict[original_num] = current_num while True: if len(num_dict) > 1: current_num = num_dict.get(num) if current_num is None or len(current_num) == 0: current_num = 0 break del num_dict[num] else: current_num = num_dict.get(current_num) break if current_name is None or len(current_name) == 0: current_name = name # 发行数据查询 sql_distribution = """ select name, company, num, pub_date from odl.faxing where name = '%s' """ sql_distribution = sql_distribution % (current_name) try: # conn_distribution = dev_mysql_conn.Mysql() sql_distribution_rows = Mysql.getAll(sql_distribution, conn=conn) except Exception, e: # conn_distribution = dev_mysql_conn.Mysql() sql_distribution_rows = Mysql.getAll(sql_distribution, conn=conn) is_distribute = 0 pub_date = '' if len(sql_distribution_rows) >= 1: pub_date = sql_distribution_rows[0]['pub_date'] is_distribute = 1 sql_insert = """ insert into odl.ad_tv_record_distribution (tv_id, name, current_name, area, theme, first_type, second_type, company, record_date, form, num, \ film_date, film_period, `desc`, scriptwriter, director, distribution_date, is_distribute) values ('%s', '%s', '%s', '%s', '%s', \ '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') """ sql_insert = sql_insert % (_id, name, current_name, area, theme, theme, theme, company, show_date, form, num, film_date, film_period, desc.replace("'", "\\'"), scriptwriter, director, pub_date, is_distribute) print sql_insert try_times = 0 while True: if try_times > 3: break try: # conn1 = dev_mysql_conn.Mysql() Mysql.insertOne(sql_insert, conn=conn) try_times = 0 break except Exception, e: try_times += 1 print e if try_times > 3: break Mysql.close(conn)