#encoding=utf-8 #author:wdw110 #功能:统计计算电视剧收视率的基础数据 from __future__ import division import re import math import time import copy import datetime import numpy as np from fty_util.common import Mysql tv_data = {} tv_rate = {} tv_avg_sr = {} tv_station = {} tv_play = {} conn = Mysql.createOfflineConn() sql = "select tv_name,channel,audience_rating,tv_date from odl.ad_television where theater_attribute='黄金剧场'" data = Mysql.selectAll(sql, conn=conn) sql_tv = "select tv_id,tv_name,director,scriptwriter,main_actors,filmer,first_type,show_time from odl.ad_tv_lib where is_use=1" tmp_data = Mysql.selectAll(sql_tv, conn=conn) for i in range(len(tmp_data)): tv_id = int(tmp_data[i][0]) tv_name = tmp_data[i][1] director = tmp_data[i][2] if tmp_data[i][2] else '' scriptwriter = tmp_data[i][3] if tmp_data[i][3] else '' actors = tmp_data[i][4] if tmp_data[i][4] else '' filmer = tmp_data[i][5] if tmp_data[i][5] else '' type1 = tmp_data[i][6] if tmp_data[i][6] else '' tv_data[(tv_id,tv_name)] = [director,scriptwriter,actors,filmer,type1] tv_play[tv_name] = tmp_data[i][7] for i in range(len(data)): tv_name = data[i][0] channel = data[i][1] aud_rating = float(data[i][2]) tv_date = datetime.datetime.strftime(data[i][3],'%Y-%m') year = data[i][3].year if aud_rating and tv_play.has_key(tv_name): #判断电视剧是在ad_tv_lib表中 show_time = tv_play[tv_name] if tv_play[tv_name] else str(year) if str(year) in show_time: tv_station.setdefault(channel,{}) tv_station[channel].setdefault(tv_date,[]) tv_station[channel][tv_date].append(aud_rating) tv_rate.setdefault(tv_name,{}) if not tv_rate.get(tv_name): tv_rate[tv_name].setdefault(year,{}) else: yy = tv_rate[tv_name].keys()[0] if year < yy: del tv_rate[tv_name][yy] tv_rate[tv_name][year] = {} if tv_rate[tv_name].has_key(year): tv_rate[tv_name][year].setdefault(channel,['9999',[]]) dd = tv_rate[tv_name][year][channel][0] if tv_date < dd: tv_rate[tv_name][year][channel][0] = tv_date tv_rate[tv_name][year][channel][1].append(aud_rating) for channel,value in tv_station.items(): for tv_date in value: tmp_arr = value[tv_date] avg_rating = sum(tmp_arr)/len(tmp_arr) tv_station[channel][tv_date] = avg_rating def avg_rate(Date,obj,channel): #Date:'2014-05',obj:tv_station,channel:电视台 '''电视台近一年的平均收视率''' array = [] tmp = Date.split('-') if int(tmp[1])==1: month = '12' elif 1