#!/usr/bin/env python #coding=utf-8 """计算每个电视剧的收视率 province_input: tmp.month_channel_stat province_output: tmp.tv_avg_ratings area_input: tmp.area_month_channel_stat area_output: tmp.area_tv_avg_ratings """ import datetime import sys from fty_util.common import Mysql, Util class tv_avg_ratings_clac(): def province(self): conn = Mysql.createOfflineConn() sql = """ truncate table tmp.tv_avg_ratings """ Mysql.execute(sql, conn=conn) # 从tmp.month_channel_stat表中取出每月的统计数据 sql = """ select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.month_channel_stat order by tv_date asc """ rows = Mysql.getAll(sql, conn=conn) channel_dict = {} # key中有日期 channel_date_dict = {} for row in rows: channel = row['channel'] theater_attribute = row['theater_attribute'] tv_name = row['tv_name'] tv_id = row['tv_id'] tv_date = row['tv_date'] sum_value = row['sum_value'] count_value = row['count_value'] key = (channel, theater_attribute, tv_name, tv_id) # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录 # 如果字典中不存在数据,则添加 if channel_dict.get(key) is None: channel_dict[key] = (str(tv_date), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value) else: # 否则进行时间判断 value = channel_dict.get(key) # 当前的日期 date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d') # 保存的日期 date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d') sub_value = (date2 - date1).days # 如果相减天数在这之间,表示同一部电视剧 if sub_value <= 25 and sub_value >= -25: value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0]))) channel_dict[key] = (str(value[0]), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2))) # 日期超出范围,则表示同一部电视剧播出了多次 else: channel_dict[key] = (str(tv_date), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value) sql = """ insert into tmp.tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s) """ data_list = [] for key in channel_date_dict.keys(): (channel, theater_attribute, tv_name, tv_id, tv_date) = key (sum_value, count_value) = channel_date_dict.get(key) # value = (channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value) data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value)) # Mysql.insertOne(sql, value=value, conn=conn) # Mysql.insertMany(sql, data_list, conn) Util.insert_by_chunk(sql, data_list, conn) Mysql.close(conn) def area(self): conn = Mysql.createOfflineConn() sql = """ truncate table tmp.area_tv_avg_ratings """ Mysql.execute(sql, conn=conn) # 从tmp.month_channel_stat表中取出每月的统计数据 sql = """ select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.area_month_channel_stat order by tv_date asc """ rows = Mysql.getAll(sql, conn=conn) channel_dict = {} # key中有日期 channel_date_dict = {} for row in rows: channel = row['channel'] theater_attribute = row['theater_attribute'] tv_name = row['tv_name'] tv_id = row['tv_id'] tv_date = row['tv_date'] sum_value = row['sum_value'] count_value = row['count_value'] key = (channel, theater_attribute, tv_name, tv_id) # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录 # 如果字典中不存在数据,则添加 if channel_dict.get(key) is None: channel_dict[key] = (str(tv_date), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value) else: # 否则进行时间判断 value = channel_dict.get(key) # 当前的日期 date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d') # 保存的日期 date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d') sub_value = (date2 - date1).days # 如果相减天数在这之间,表示同一部电视剧 if sub_value <= 25 and sub_value >= -25: value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0]))) channel_dict[key] = (str(value[0]), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2))) # 日期超出范围,则表示同一部电视剧播出了多次 else: channel_dict[key] = (str(tv_date), sum_value, count_value) channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value) sql = """ insert into tmp.area_tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s) """ data_list = [] for key in channel_date_dict.keys(): (channel, theater_attribute, tv_name, tv_id, tv_date) = key (sum_value, count_value) = channel_date_dict.get(key) data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value)) Util.insert_by_chunk(sql, data_list, conn) Mysql.close(conn) if __name__ == '__main__': if len(sys.argv) != 2: print '没有输入参数,退出' sys.exit(0) print 'method name is ' + sys.argv[1] obj = tv_avg_ratings_clac() try: getattr(obj, sys.argv[1])() except Exception, e: print e