123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- #!/usr/bin/env python
- #coding=utf-8
- """计算每个电视剧的收视率
- province_input: tmp.month_channel_stat
- province_output: tmp.tv_avg_ratings
- area_input: tmp.area_month_channel_stat
- area_output: tmp.area_tv_avg_ratings
- """
- import datetime
- import sys
- from fty_util.common import Mysql, Util
- class tv_avg_ratings_clac():
-
- def province(self):
- conn = Mysql.createOfflineConn()
- sql = """
- truncate table tmp.tv_avg_ratings
- """
- Mysql.execute(sql, conn=conn)
- # 从tmp.month_channel_stat表中取出每月的统计数据
- sql = """
- select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.month_channel_stat order by tv_date asc
- """
- rows = Mysql.getAll(sql, conn=conn)
- channel_dict = {}
- # key中有日期
- channel_date_dict = {}
- for row in rows:
- channel = row['channel']
- theater_attribute = row['theater_attribute']
- tv_name = row['tv_name']
- tv_id = row['tv_id']
- tv_date = row['tv_date']
- sum_value = row['sum_value']
- count_value = row['count_value']
- key = (channel, theater_attribute, tv_name, tv_id)
- # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录
- # 如果字典中不存在数据,则添加
- if channel_dict.get(key) is None:
- channel_dict[key] = (str(tv_date), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
- else:
- # 否则进行时间判断
- value = channel_dict.get(key)
- # 当前的日期
- date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d')
- # 保存的日期
- date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d')
- sub_value = (date2 - date1).days
- # 如果相减天数在这之间,表示同一部电视剧
- if sub_value <= 25 and sub_value >= -25:
- value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0])))
- channel_dict[key] = (str(value[0]), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2)))
- # 日期超出范围,则表示同一部电视剧播出了多次
- else:
- channel_dict[key] = (str(tv_date), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
- sql = """
- insert into tmp.tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s)
- """
- data_list = []
- for key in channel_date_dict.keys():
- (channel, theater_attribute, tv_name, tv_id, tv_date) = key
- (sum_value, count_value) = channel_date_dict.get(key)
- # value = (channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value)
- data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value))
- # Mysql.insertOne(sql, value=value, conn=conn)
- # Mysql.insertMany(sql, data_list, conn)
- Util.insert_by_chunk(sql, data_list, conn)
- Mysql.close(conn)
- def area(self):
- conn = Mysql.createOfflineConn()
- sql = """
- truncate table tmp.area_tv_avg_ratings
- """
- Mysql.execute(sql, conn=conn)
- # 从tmp.month_channel_stat表中取出每月的统计数据
- sql = """
- select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.area_month_channel_stat order by tv_date asc
- """
- rows = Mysql.getAll(sql, conn=conn)
- channel_dict = {}
- # key中有日期
- channel_date_dict = {}
- for row in rows:
- channel = row['channel']
- theater_attribute = row['theater_attribute']
- tv_name = row['tv_name']
- tv_id = row['tv_id']
- tv_date = row['tv_date']
- sum_value = row['sum_value']
- count_value = row['count_value']
- key = (channel, theater_attribute, tv_name, tv_id)
- # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录
- # 如果字典中不存在数据,则添加
- if channel_dict.get(key) is None:
- channel_dict[key] = (str(tv_date), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
- else:
- # 否则进行时间判断
- value = channel_dict.get(key)
- # 当前的日期
- date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d')
- # 保存的日期
- date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d')
- sub_value = (date2 - date1).days
- # 如果相减天数在这之间,表示同一部电视剧
- if sub_value <= 25 and sub_value >= -25:
- value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0])))
- channel_dict[key] = (str(value[0]), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2)))
- # 日期超出范围,则表示同一部电视剧播出了多次
- else:
- channel_dict[key] = (str(tv_date), sum_value, count_value)
- channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
-
- sql = """
- insert into tmp.area_tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s)
- """
- data_list = []
- for key in channel_date_dict.keys():
- (channel, theater_attribute, tv_name, tv_id, tv_date) = key
- (sum_value, count_value) = channel_date_dict.get(key)
- data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value))
-
- Util.insert_by_chunk(sql, data_list, conn)
- Mysql.close(conn)
- if __name__ == '__main__':
- if len(sys.argv) != 2:
- print '没有输入参数,退出'
- sys.exit(0)
- print 'method name is ' + sys.argv[1]
- obj = tv_avg_ratings_clac()
- try:
- getattr(obj, sys.argv[1])()
- except Exception, e:
- print e
|