tmp_ad_tv_sr_stat.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #encoding=utf-8
  2. #author:wdw110
  3. #功能:统计计算电视剧收视率的基础数据
  4. from __future__ import division
  5. import re
  6. import math
  7. import time
  8. import copy
  9. import datetime
  10. import numpy as np
  11. from fty_util.common import Mysql
  12. tv_data = {}
  13. tv_rate = {}
  14. tv_avg_sr = {}
  15. tv_station = {}
  16. tv_play = {}
  17. conn = Mysql.createOfflineConn()
  18. sql = "select tv_name,channel,audience_rating,tv_date from odl.ad_television where theater_attribute='黄金剧场'"
  19. data = Mysql.selectAll(sql, conn=conn)
  20. sql_tv = "select tv_id,tv_name,director,scriptwriter,main_actors,filmer,first_type,show_time from odl.ad_tv_lib where is_use=1"
  21. tmp_data = Mysql.selectAll(sql_tv, conn=conn)
  22. for i in range(len(tmp_data)):
  23. tv_id = int(tmp_data[i][0])
  24. tv_name = tmp_data[i][1]
  25. director = tmp_data[i][2] if tmp_data[i][2] else ''
  26. scriptwriter = tmp_data[i][3] if tmp_data[i][3] else ''
  27. actors = tmp_data[i][4] if tmp_data[i][4] else ''
  28. filmer = tmp_data[i][5] if tmp_data[i][5] else ''
  29. type1 = tmp_data[i][6] if tmp_data[i][6] else ''
  30. tv_data[(tv_id,tv_name)] = [director,scriptwriter,actors,filmer,type1]
  31. tv_play[tv_name] = tmp_data[i][7]
  32. for i in range(len(data)):
  33. tv_name = data[i][0]
  34. channel = data[i][1]
  35. aud_rating = float(data[i][2])
  36. tv_date = datetime.datetime.strftime(data[i][3],'%Y-%m')
  37. year = data[i][3].year
  38. if aud_rating and tv_play.has_key(tv_name): #判断电视剧是在ad_tv_lib表中
  39. show_time = tv_play[tv_name] if tv_play[tv_name] else str(year)
  40. if str(year) in show_time:
  41. tv_station.setdefault(channel,{})
  42. tv_station[channel].setdefault(tv_date,[])
  43. tv_station[channel][tv_date].append(aud_rating)
  44. tv_rate.setdefault(tv_name,{})
  45. if not tv_rate.get(tv_name):
  46. tv_rate[tv_name].setdefault(year,{})
  47. else:
  48. yy = tv_rate[tv_name].keys()[0]
  49. if year < yy:
  50. del tv_rate[tv_name][yy]
  51. tv_rate[tv_name][year] = {}
  52. if tv_rate[tv_name].has_key(year):
  53. tv_rate[tv_name][year].setdefault(channel,['9999',[]])
  54. dd = tv_rate[tv_name][year][channel][0]
  55. if tv_date < dd:
  56. tv_rate[tv_name][year][channel][0] = tv_date
  57. tv_rate[tv_name][year][channel][1].append(aud_rating)
  58. for channel,value in tv_station.items():
  59. for tv_date in value:
  60. tmp_arr = value[tv_date]
  61. avg_rating = sum(tmp_arr)/len(tmp_arr)
  62. tv_station[channel][tv_date] = avg_rating
  63. def avg_rate(Date,obj,channel): #Date:'2014-05',obj:tv_station,channel:电视台
  64. '''电视台近一年的平均收视率'''
  65. array = []
  66. tmp = Date.split('-')
  67. if int(tmp[1])==1:
  68. month = '12'
  69. elif 1<int(tmp[1])<=10:
  70. month = '0'+str(int(tmp[1])-1)
  71. else:
  72. month = str(int(tmp[1])-1)
  73. Date2 = str(int(tmp[0])-1)+'-'+month
  74. tmp_obj = obj[channel]
  75. for tv_date in tmp_obj:
  76. if Date2<=tv_date<=Date:
  77. array.append(tmp_obj[tv_date])
  78. if not len(array):
  79. print Date,channel,obj[channel]
  80. res = sum(array)/len(array)
  81. return res
  82. #tv_rate_new = copy.deepcopy(tv_rate)
  83. for tv_name in tv_rate:
  84. year = tv_rate[tv_name].keys()[0]
  85. tv_sr = []
  86. for channel,value in tv_rate[tv_name][year].items():
  87. tv_date = value[0]
  88. tv_avg_rating = sum(value[1])/len(value[1])
  89. tv_sr.append(tv_avg_rating/avg_rate(tv_date,tv_station,channel))
  90. tv_avg_sr[tv_name] = (year,sum(tv_sr)/len(tv_sr))
  91. people_sr = [{},{},{},{},{}] #每个维度中所有变量的值
  92. for id_name in tv_data:
  93. tv_id = id_name[0]
  94. tv_name = id_name[1]
  95. people_arr = tv_data[id_name]
  96. if tv_avg_sr.get(tv_name):
  97. year,avg_sr = tv_avg_sr[tv_name]
  98. for i in range(len(people_arr)):
  99. if people_arr[i]:
  100. p_arr = people_arr[i].split(u' ')
  101. for peo in p_arr:
  102. if peo:
  103. people_sr[i].setdefault(peo,{})
  104. people_sr[i][peo].setdefault(year,[])
  105. people_sr[i][peo][year].append((tv_id,avg_sr))
  106. people_sr_new = copy.deepcopy(people_sr)
  107. people_sr_new2 = copy.deepcopy(people_sr)
  108. for i in range(len(people_sr)):
  109. for peo in people_sr[i]:
  110. peo_obj = people_sr[i][peo]
  111. for year,value in people_sr[i][peo].items():
  112. people_sr_new[i][peo][year] = []
  113. people_sr_new2[i][peo][year] = []
  114. for j in range(2010,year+1):
  115. if peo_obj.has_key(j):
  116. tmp_arr = [jj[1] for jj in peo_obj[j]]
  117. people_sr_new[i][peo][year].extend(tmp_arr)
  118. people_sr_new2[i][peo][year].extend(peo_obj[j])
  119. result_sr = [] #每个电视剧的收视指数信息
  120. for id_name in tv_data:
  121. tv_id = id_name[0]
  122. tv_name = id_name[1]
  123. people_arr = tv_data[id_name]
  124. if tv_avg_sr.get(tv_name):
  125. year,avg_sr = tv_avg_sr[tv_name]
  126. peo_arr = [tv_id,tv_name,avg_sr]
  127. for i in range(len(people_arr)):
  128. tmp_str = ''
  129. p_arr = people_arr[i].split(u' ')
  130. for peo in p_arr:
  131. if people_sr_new[i].has_key(peo):
  132. var = sum(people_sr_new[i][peo][year])/len(people_sr_new[i][peo][year])
  133. tmp_str += peo + ":" + str(var) + ';'
  134. peo_arr.append(tmp_str)
  135. peo_arr.append(year)
  136. result_sr.append(tuple(peo_arr))
  137. result = []
  138. for i in range(len(people_sr_new2)):
  139. people_obj = people_sr_new2[i]
  140. for peo in people_obj:
  141. for year,value in people_obj[peo].items():
  142. str1 = str([arr[0] for arr in value])
  143. str2 = str([arr[1] for arr in value])
  144. result.append((peo,str1,str2,year,i+1))
  145. delete = 'delete from tmp.ad_tv_sr_pre_var'
  146. Mysql.execute(delete, conn=conn)
  147. sql = 'insert into tmp.ad_tv_sr_pre_var values(%s,%s,%s,%s,%s)'
  148. for i in range(int(len(result)/1000)+1):
  149. tmp = result[i*1000:(i+1)*1000]
  150. Mysql.insertMany(sql, tmp, conn=conn)
  151. delete = 'delete from tmp.ad_tv_sr'
  152. Mysql.execute(delete, conn=conn)
  153. sql_sr = 'insert into tmp.ad_tv_sr values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'
  154. for i in range(int(len(result_sr)/1000)+1):
  155. tmp = result_sr[i*1000:(i+1)*1000]
  156. Mysql.insertMany(sql_sr, tmp, conn=conn)
  157. Mysql.close(conn)