idl_tv_sr_denoise.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #encoding=utf-8
  2. #author:wdw110
  3. #功能: 对卫视和地方的电视剧收视数据进行去噪声处理
  4. from __future__ import division
  5. import math
  6. import copy
  7. import mysql.connector
  8. import time
  9. import numpy as np
  10. from fty_util.common import Mysql
  11. tbs = ['ad_television','area_ad_television']
  12. choose = ['ad','area']
  13. def Stat(Number): #Number:0和1
  14. tv_data = []
  15. tv_play = {}
  16. tv_station = {}
  17. dateline = str(time.localtime().tm_year-1)
  18. conn = Mysql.createOfflineConn()
  19. sql = "select id,tv_name,channel,theater_attribute,epi_num,tv_date,start_time,end_time,audience_rating,avg_rating,market_rating from odl.%s where year(tv_date)>=%s" % (tbs[Number], dateline)
  20. data = Mysql.selectAll(sql, conn=conn)
  21. for i in range(len(data)):
  22. dd = list(data[i])
  23. tv_id = data[i][0]
  24. tv_name = data[i][1]
  25. channel = data[i][2]
  26. theater = data[i][3]
  27. year = data[i][5].year
  28. if dd[8]>=0 and dd[9]>=0 and dd[10]>=0 and dd[3]:
  29. key = (tv_name, channel, theater, year)
  30. tv_play[tv_id] = dd[1:]
  31. tv_station.setdefault(key,[[],[]])
  32. tv_station[key][0].append(tv_id)
  33. tmp_data = map(float,data[i][8:11])
  34. tv_station[key][1].append(tmp_data)
  35. def fivenum(arr):
  36. """Tukey's five number"""
  37. arr = np.sort(arr)
  38. res = []
  39. n = len(arr)
  40. if n == 0:
  41. print '数组不能为空'
  42. return res
  43. else:
  44. n4 = math.floor((n+3)/2)/2
  45. d = [0,n4-1,(n-1)/2,n-n4,n-1]
  46. d_floor = [int(math.floor(i)) for i in d]
  47. d_ceil = [int(math.ceil(i)) for i in d]
  48. res = list(0.5 * (arr[d_floor]+arr[d_ceil]))
  49. return res
  50. def denoise(arr):
  51. """异常值检测:在区间[Q1-1.5*(Q3-Q1),Q3+1.5*(Q3-Q1)]
  52. 之外的点均为异常值,其中,Q1为四分之一分位点,Q3为四分之三分位点"""
  53. five_arr = fivenum(arr)
  54. Q1 = five_arr[1]
  55. Q3 = five_arr[3]
  56. L1 = Q1-1.5*(Q3-Q1) #区间下界
  57. L2 = Q3+1.5*(Q3-Q1) #区间上界
  58. res = []
  59. for i in range(len(arr)):
  60. if arr[i]<=L2:
  61. res.append(arr[i])
  62. else:
  63. res.append(-1)
  64. return res
  65. result = []
  66. for key in tv_station:
  67. id_arr = tv_station[key][0]
  68. vv = np.array(tv_station[key][1])
  69. tmp_arr = np.array(map(denoise,vv.transpose())).transpose()
  70. for i in range(len(id_arr)):
  71. tv_id = id_arr[i]
  72. tmp = [tv_id] + tv_play[tv_id] + map(float,list(tmp_arr[i]))
  73. result.append(tmp)
  74. #写入yxb.ad_tv_rating_denoise
  75. delete = 'delete from yxb.%s_tv_rating_denoise where year(tv_date)>=%s' %(choose[Number], dateline)
  76. try:
  77. Mysql.execute(delete, conn=conn)
  78. except mysql.connector.errors.ProgrammingError as e:
  79. pass
  80. ind1 = 'DROP INDEX id ON yxb.%s_tv_rating_denoise' % choose[Number]
  81. ind2 = 'DROP INDEX tv_date ON yxb.%s_tv_rating_denoise' % choose[Number]
  82. try:
  83. Mysql.execute(ind1, conn=conn)
  84. except mysql.connector.errors.ProgrammingError as e:
  85. pass
  86. try:
  87. Mysql.execute(ind2, conn=conn)
  88. except mysql.connector.errors.ProgrammingError as e:
  89. pass
  90. sql = 'insert into yxb.%s_tv_rating_denoise' % choose[Number] + ' values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
  91. for i in range(int(len(result)/1000)+1):
  92. tmp = result[i*1000:(i+1)*1000]
  93. Mysql.insertMany(sql, tmp, conn=conn)
  94. sql1 = 'CREATE INDEX id ON yxb.%s_tv_rating_denoise(id)' % choose[Number]
  95. sql2 = 'CREATE INDEX tv_date ON yxb.%s_tv_rating_denoise (tv_date,theater_attribute)' % choose[Number]
  96. Mysql.execute(sql1, conn=conn)
  97. Mysql.execute(sql2, conn=conn)
  98. #写入odl.ad_tv_rating_denoise
  99. delete = 'delete from odl.%s_tv_rating_denoise where year(tv_date)>=%s' % (choose[Number],dateline)
  100. try:
  101. Mysql.execute(delete, conn=conn)
  102. except mysql.connector.errors.ProgrammingError as e:
  103. pass
  104. sql = 'insert into odl.%s_tv_rating_denoise select * from yxb.%s_tv_rating_denoise where year(tv_date)>=%s' %(choose[Number], choose[Number], dateline)
  105. try:
  106. Mysql.execute(sql, conn=conn)
  107. except mysql.connector.errors.ProgrammingError as e:
  108. pass
  109. Mysql.close(conn)
  110. for i in range(0,2):
  111. Stat(i)