tmp_tv_avg_ratings_stat.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #!/usr/bin/env python
  2. #coding=utf-8
  3. """计算每个电视剧的收视率
  4. province_input: tmp.month_channel_stat
  5. province_output: tmp.tv_avg_ratings
  6. area_input: tmp.area_month_channel_stat
  7. area_output: tmp.area_tv_avg_ratings
  8. """
  9. import datetime
  10. import sys
  11. from fty_util.common import Mysql, Util
  12. class tv_avg_ratings_clac():
  13. def province(self):
  14. conn = Mysql.createOfflineConn()
  15. sql = """
  16. truncate table tmp.tv_avg_ratings
  17. """
  18. Mysql.execute(sql, conn=conn)
  19. # 从tmp.month_channel_stat表中取出每月的统计数据
  20. sql = """
  21. select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.month_channel_stat order by tv_date asc
  22. """
  23. rows = Mysql.getAll(sql, conn=conn)
  24. channel_dict = {}
  25. # key中有日期
  26. channel_date_dict = {}
  27. for row in rows:
  28. channel = row['channel']
  29. theater_attribute = row['theater_attribute']
  30. tv_name = row['tv_name']
  31. tv_id = row['tv_id']
  32. tv_date = row['tv_date']
  33. sum_value = row['sum_value']
  34. count_value = row['count_value']
  35. key = (channel, theater_attribute, tv_name, tv_id)
  36. # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录
  37. # 如果字典中不存在数据,则添加
  38. if channel_dict.get(key) is None:
  39. channel_dict[key] = (str(tv_date), sum_value, count_value)
  40. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
  41. else:
  42. # 否则进行时间判断
  43. value = channel_dict.get(key)
  44. # 当前的日期
  45. date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d')
  46. # 保存的日期
  47. date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d')
  48. sub_value = (date2 - date1).days
  49. # 如果相减天数在这之间,表示同一部电视剧
  50. if sub_value <= 25 and sub_value >= -25:
  51. value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0])))
  52. channel_dict[key] = (str(value[0]), sum_value, count_value)
  53. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2)))
  54. # 日期超出范围,则表示同一部电视剧播出了多次
  55. else:
  56. channel_dict[key] = (str(tv_date), sum_value, count_value)
  57. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
  58. sql = """
  59. insert into tmp.tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s)
  60. """
  61. data_list = []
  62. for key in channel_date_dict.keys():
  63. (channel, theater_attribute, tv_name, tv_id, tv_date) = key
  64. (sum_value, count_value) = channel_date_dict.get(key)
  65. # value = (channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value)
  66. data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value))
  67. # Mysql.insertOne(sql, value=value, conn=conn)
  68. # Mysql.insertMany(sql, data_list, conn)
  69. Util.insert_by_chunk(sql, data_list, conn)
  70. Mysql.close(conn)
  71. def area(self):
  72. conn = Mysql.createOfflineConn()
  73. sql = """
  74. truncate table tmp.area_tv_avg_ratings
  75. """
  76. Mysql.execute(sql, conn=conn)
  77. # 从tmp.month_channel_stat表中取出每月的统计数据
  78. sql = """
  79. select channel, theater_attribute, tv_name, tv_id, tv_date, sum_value, count_value from tmp.area_month_channel_stat order by tv_date asc
  80. """
  81. rows = Mysql.getAll(sql, conn=conn)
  82. channel_dict = {}
  83. # key中有日期
  84. channel_date_dict = {}
  85. for row in rows:
  86. channel = row['channel']
  87. theater_attribute = row['theater_attribute']
  88. tv_name = row['tv_name']
  89. tv_id = row['tv_id']
  90. tv_date = row['tv_date']
  91. sum_value = row['sum_value']
  92. count_value = row['count_value']
  93. key = (channel, theater_attribute, tv_name, tv_id)
  94. # 如果两部电视剧在同一台同一剧场播放两次,则保存两条记录
  95. # 如果字典中不存在数据,则添加
  96. if channel_dict.get(key) is None:
  97. channel_dict[key] = (str(tv_date), sum_value, count_value)
  98. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
  99. else:
  100. # 否则进行时间判断
  101. value = channel_dict.get(key)
  102. # 当前的日期
  103. date1 = datetime.datetime.strptime(str(tv_date), '%Y-%m-%d')
  104. # 保存的日期
  105. date2 = datetime.datetime.strptime(str(value[0]), '%Y-%m-%d')
  106. sub_value = (date2 - date1).days
  107. # 如果相减天数在这之间,表示同一部电视剧
  108. if sub_value <= 25 and sub_value >= -25:
  109. value2 = channel_date_dict.get((channel, theater_attribute, tv_name, tv_id, str(value[0])))
  110. channel_dict[key] = (str(value[0]), sum_value, count_value)
  111. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(value[0]))] = tuple(x + y for x, y in zip((sum_value, count_value), tuple(value2)))
  112. # 日期超出范围,则表示同一部电视剧播出了多次
  113. else:
  114. channel_dict[key] = (str(tv_date), sum_value, count_value)
  115. channel_date_dict[(channel, theater_attribute, tv_name, tv_id, str(tv_date))] = (sum_value, count_value)
  116. sql = """
  117. insert into tmp.area_tv_avg_ratings (channel, theater_attribute, tv_name, tv_id, tv_date, value) values (%s, %s, %s, %s, %s, %s)
  118. """
  119. data_list = []
  120. for key in channel_date_dict.keys():
  121. (channel, theater_attribute, tv_name, tv_id, tv_date) = key
  122. (sum_value, count_value) = channel_date_dict.get(key)
  123. data_list.append((channel, theater_attribute, tv_name, tv_id, tv_date, float(sum_value) / count_value))
  124. Util.insert_by_chunk(sql, data_list, conn)
  125. Mysql.close(conn)
  126. if __name__ == '__main__':
  127. if len(sys.argv) != 2:
  128. print '没有输入参数,退出'
  129. sys.exit(0)
  130. print 'method name is ' + sys.argv[1]
  131. obj = tv_avg_ratings_clac()
  132. try:
  133. getattr(obj, sys.argv[1])()
  134. except Exception, e:
  135. print e