idl_ad_pub_station_stats.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #encoding=utf-8
  2. #author:wdw110
  3. #功能:统计电视台的各类型数量和收视率
  4. from __future__ import division
  5. import re
  6. import math
  7. import time
  8. import datetime
  9. import numpy as np
  10. from fty_util.common import Mysql
  11. tv_data = {}
  12. tv_data2 = {}
  13. tv_station = {}
  14. tv_station_type = {}
  15. channel_type = {}
  16. result_rate = []
  17. result_type = []
  18. result_channel = []
  19. conn = Mysql.createOfflineConn()
  20. sql = "select tv_name,channel,audience_rating,tv_date from odl.ad_television where theater_attribute='黄金剧场'"
  21. data = Mysql.selectAll(sql, conn=conn)
  22. sql_tv = "select tv_id,tv_name,theme,second_type,decade,first_type from odl.ad_tv_lib where is_use=1"
  23. tmp_data = Mysql.selectAll(sql_tv, conn=conn)
  24. for i in range(len(tmp_data)):
  25. tv_id = tmp_data[i][0]
  26. tv_name = tmp_data[i][1]
  27. theme = tmp_data[i][2]
  28. type2 = tmp_data[i][3]
  29. decade = tmp_data[i][4]
  30. type1 = tmp_data[i][5]
  31. if type1 and type2:
  32. tv_data[tv_name] = [tv_id,theme,type2,decade]
  33. tv_data2[tv_name] = [tv_id,type1,type2]
  34. #按月统计电视台的收视率
  35. for i in range(len(data)):
  36. tv_name = data[i][0]
  37. channel = data[i][1]
  38. aud_rating = data[i][2]
  39. tv_date = datetime.datetime.strftime(data[i][3],'%Y-%m')
  40. tv_station.setdefault(channel,{})
  41. tv_station[channel].setdefault(tv_date,[])
  42. tv_station[channel][tv_date].append(aud_rating)
  43. channel_type.setdefault(channel,{})
  44. channel_type[channel].setdefault(tv_date,{})
  45. if tv_data2.get(tv_name):
  46. tv_arr = tv_data2[tv_name][1:-1]
  47. for level in range(len(tv_arr)):
  48. channel_type[channel][tv_date].setdefault(level,{})
  49. ty = tv_arr[level]
  50. if ty:
  51. type_arr = ty.split(u' ')
  52. for tt in type_arr:
  53. if len(tt):
  54. channel_type[channel][tv_date][level].setdefault(tt,[])
  55. channel_type[channel][tv_date][level][tt].append(aud_rating)
  56. for channel,value in channel_type.items():
  57. for tv_date in value:
  58. date = datetime.datetime.strptime(tv_date,'%Y-%m').date()
  59. val = value[tv_date]
  60. for level,v_obj in val.items():
  61. for k,v in v_obj.items():
  62. avg = sum(v)/len(v)
  63. result_channel.append((channel,k,avg,level+1,date))
  64. for channel,value in tv_station.items():
  65. for tv_date in value:
  66. tmp_arr = value[tv_date]
  67. avg_rating = sum(tmp_arr)/len(tmp_arr)
  68. date = datetime.datetime.strptime(tv_date,'%Y-%m').date()
  69. result_rate.append((channel,avg_rating,date))
  70. #按类型统计电视台播放电视剧数量
  71. for i in range(len(data)):
  72. tv_name = data[i][0]
  73. channel = data[i][1]
  74. tv_date = datetime.datetime.strftime(data[i][3],'%Y-%m')
  75. if tv_data.get(tv_name):
  76. tv_id = tv_data[tv_name][0]
  77. type1 = tv_data[tv_name][1]
  78. type2 = tv_data[tv_name][2]
  79. decade = tv_data[tv_name][3]
  80. type_arr = type2.split(u' ') if type2 else []
  81. tv_station_type.setdefault(channel,{})
  82. tv_station_type[channel].setdefault(tv_date,{"type1":{},"type2":{}})
  83. tv_station_type[channel][tv_date]['type1'].setdefault(type1,{})
  84. tv_station_type[channel][tv_date]['type1'][type1][tv_id] = decade
  85. for t2 in type_arr:
  86. if len(t2):
  87. tv_station_type[channel][tv_date]['type2'].setdefault(t2,{})
  88. tv_station_type[channel][tv_date]['type2'][t2][tv_id] = decade
  89. for channel,value in tv_station_type.items():
  90. for tv_date in value:
  91. type1_obj = value[tv_date]['type1']
  92. type2_obj = value[tv_date]['type2']
  93. date = datetime.datetime.strptime(tv_date,'%Y-%m').date()
  94. for t1,v1 in type1_obj.items():
  95. for con in v1:
  96. t1_arr = [channel,t1,'1',con,date,v1[con]]
  97. result_type.append(t1_arr)
  98. for t2,v2 in type2_obj.items():
  99. for con in v2:
  100. t2_arr = [channel,t2,'2',con,date,v2[con]]
  101. result_type.append(t2_arr)
  102. delete = 'truncate table idl.ad_pub_station_rate_stats'
  103. Mysql.execute(delete, conn=conn)
  104. sql_rate = 'insert into idl.ad_pub_station_rate_stats(channel,avg_rating,date) values(%s,%s,%s)'
  105. for i in range(int(len(result_rate)/1000)+1):
  106. tmp = result_rate[i*1000:(i+1)*1000]
  107. Mysql.insertMany(sql_rate, tmp, conn=conn)
  108. delete = 'truncate table idl.ad_pub_station_type_stats'
  109. Mysql.execute(delete, conn=conn)
  110. sql_type = 'insert into idl.ad_pub_station_type_stats(channel,type,level,tv_id,date,decade) values(%s,%s,%s,%s,%s,%s)'
  111. for i in range(int(len(result_type)/1000)+1):
  112. tmp = result_type[i*1000:(i+1)*1000]
  113. Mysql.insertMany(sql_type, tmp, conn=conn)
  114. delete = 'truncate table idl.ad_pub_station_type_rate'
  115. Mysql.execute(delete, conn=conn)
  116. sql_channel = 'insert into idl.ad_pub_station_type_rate(channel,type,avg_rating,level,date) values(%s,%s,%s,%s,%s)'
  117. for i in range(int(len(result_channel)/1000)+1):
  118. tmp = result_channel[i*1000:(i+1)*1000]
  119. Mysql.insertMany(sql_channel, tmp, conn=conn)
  120. Mysql.close(conn)