123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- #encoding=utf-8
- #author:wdw110
- #功能:统计计算电视剧收视率的基础数据
- from __future__ import division
- import re
- import math
- import time
- import copy
- import datetime
- import numpy as np
- from fty_util.common import Mysql
- tv_data = {}
- tv_rate = {}
- tv_avg_sr = {}
- tv_station = {}
- tv_play = {}
- conn = Mysql.createOfflineConn()
- sql = "select tv_name,channel,audience_rating,tv_date from odl.ad_television where theater_attribute='黄金剧场'"
- data = Mysql.selectAll(sql, conn=conn)
- sql_tv = "select tv_id,tv_name,director,scriptwriter,main_actors,filmer,first_type,show_time from odl.ad_tv_lib where is_use=1"
- tmp_data = Mysql.selectAll(sql_tv, conn=conn)
- for i in range(len(tmp_data)):
- tv_id = int(tmp_data[i][0])
- tv_name = tmp_data[i][1]
- director = tmp_data[i][2] if tmp_data[i][2] else ''
- scriptwriter = tmp_data[i][3] if tmp_data[i][3] else ''
- actors = tmp_data[i][4] if tmp_data[i][4] else ''
- filmer = tmp_data[i][5] if tmp_data[i][5] else ''
- type1 = tmp_data[i][6] if tmp_data[i][6] else ''
- tv_data[(tv_id,tv_name)] = [director,scriptwriter,actors,filmer,type1]
- tv_play[tv_name] = tmp_data[i][7]
- for i in range(len(data)):
- tv_name = data[i][0]
- channel = data[i][1]
- aud_rating = float(data[i][2])
- tv_date = datetime.datetime.strftime(data[i][3],'%Y-%m')
- year = data[i][3].year
- if aud_rating and tv_play.has_key(tv_name): #判断电视剧是在ad_tv_lib表中
- show_time = tv_play[tv_name] if tv_play[tv_name] else str(year)
- if str(year) in show_time:
- tv_station.setdefault(channel,{})
- tv_station[channel].setdefault(tv_date,[])
- tv_station[channel][tv_date].append(aud_rating)
- tv_rate.setdefault(tv_name,{})
- if not tv_rate.get(tv_name):
- tv_rate[tv_name].setdefault(year,{})
- else:
- yy = tv_rate[tv_name].keys()[0]
- if year < yy:
- del tv_rate[tv_name][yy]
- tv_rate[tv_name][year] = {}
- if tv_rate[tv_name].has_key(year):
- tv_rate[tv_name][year].setdefault(channel,['9999',[]])
- dd = tv_rate[tv_name][year][channel][0]
- if tv_date < dd:
- tv_rate[tv_name][year][channel][0] = tv_date
- tv_rate[tv_name][year][channel][1].append(aud_rating)
- for channel,value in tv_station.items():
- for tv_date in value:
- tmp_arr = value[tv_date]
- avg_rating = sum(tmp_arr)/len(tmp_arr)
- tv_station[channel][tv_date] = avg_rating
- def avg_rate(Date,obj,channel): #Date:'2014-05',obj:tv_station,channel:电视台
- '''电视台近一年的平均收视率'''
- array = []
- tmp = Date.split('-')
- if int(tmp[1])==1:
- month = '12'
- elif 1<int(tmp[1])<=10:
- month = '0'+str(int(tmp[1])-1)
- else:
- month = str(int(tmp[1])-1)
- Date2 = str(int(tmp[0])-1)+'-'+month
- tmp_obj = obj[channel]
- for tv_date in tmp_obj:
- if Date2<=tv_date<=Date:
- array.append(tmp_obj[tv_date])
- if not len(array):
- print Date,channel,obj[channel]
- res = sum(array)/len(array)
- return res
- #tv_rate_new = copy.deepcopy(tv_rate)
- for tv_name in tv_rate:
- year = tv_rate[tv_name].keys()[0]
- tv_sr = []
- for channel,value in tv_rate[tv_name][year].items():
- tv_date = value[0]
- tv_avg_rating = sum(value[1])/len(value[1])
- tv_sr.append(tv_avg_rating/avg_rate(tv_date,tv_station,channel))
- tv_avg_sr[tv_name] = (year,sum(tv_sr)/len(tv_sr))
- people_sr = [{},{},{},{},{}] #每个维度中所有变量的值
- for id_name in tv_data:
- tv_id = id_name[0]
- tv_name = id_name[1]
- people_arr = tv_data[id_name]
- if tv_avg_sr.get(tv_name):
- year,avg_sr = tv_avg_sr[tv_name]
- for i in range(len(people_arr)):
- if people_arr[i]:
- p_arr = people_arr[i].split(u' ')
- for peo in p_arr:
- if peo:
- people_sr[i].setdefault(peo,{})
- people_sr[i][peo].setdefault(year,[])
- people_sr[i][peo][year].append((tv_id,avg_sr))
- people_sr_new = copy.deepcopy(people_sr)
- people_sr_new2 = copy.deepcopy(people_sr)
- for i in range(len(people_sr)):
- for peo in people_sr[i]:
- peo_obj = people_sr[i][peo]
- for year,value in people_sr[i][peo].items():
- people_sr_new[i][peo][year] = []
- people_sr_new2[i][peo][year] = []
- for j in range(2010,year+1):
- if peo_obj.has_key(j):
- tmp_arr = [jj[1] for jj in peo_obj[j]]
- people_sr_new[i][peo][year].extend(tmp_arr)
- people_sr_new2[i][peo][year].extend(peo_obj[j])
- result_sr = [] #每个电视剧的收视指数信息
- for id_name in tv_data:
- tv_id = id_name[0]
- tv_name = id_name[1]
- people_arr = tv_data[id_name]
- if tv_avg_sr.get(tv_name):
- year,avg_sr = tv_avg_sr[tv_name]
- peo_arr = [tv_id,tv_name,avg_sr]
- for i in range(len(people_arr)):
- tmp_str = ''
- p_arr = people_arr[i].split(u' ')
- for peo in p_arr:
- if people_sr_new[i].has_key(peo):
- var = sum(people_sr_new[i][peo][year])/len(people_sr_new[i][peo][year])
- tmp_str += peo + ":" + str(var) + ';'
- peo_arr.append(tmp_str)
- peo_arr.append(year)
- result_sr.append(tuple(peo_arr))
-
- result = []
- for i in range(len(people_sr_new2)):
- people_obj = people_sr_new2[i]
- for peo in people_obj:
- for year,value in people_obj[peo].items():
- str1 = str([arr[0] for arr in value])
- str2 = str([arr[1] for arr in value])
- result.append((peo,str1,str2,year,i+1))
- delete = 'delete from tmp.ad_tv_sr_pre_var'
- Mysql.execute(delete, conn=conn)
- sql = 'insert into tmp.ad_tv_sr_pre_var values(%s,%s,%s,%s,%s)'
- for i in range(int(len(result)/1000)+1):
- tmp = result[i*1000:(i+1)*1000]
- Mysql.insertMany(sql, tmp, conn=conn)
- delete = 'delete from tmp.ad_tv_sr'
- Mysql.execute(delete, conn=conn)
- sql_sr = 'insert into tmp.ad_tv_sr values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'
- for i in range(int(len(result_sr)/1000)+1):
- tmp = result_sr[i*1000:(i+1)*1000]
- Mysql.insertMany(sql_sr, tmp, conn=conn)
- Mysql.close(conn)
|