lianghua
/
py_script


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
							#/usr/bin/env python
#coding=utf-8

import random
import sys
import time
from selenium import webdriver
from urllib import quote
from fty_util.common import Mysql

reload(sys)
sys.setdefaultencoding('utf8')

conn = Mysql.createOfflineConn()

sql = """
    select id, tv_name from scrapy.wangju_url where url_kankan is null order by id asc
"""

rows = Mysql.getAll(sql, conn=conn)

driver = webdriver.Firefox()
driver.set_page_load_timeout(10)
for row in rows:
    
    _id = row['id']
    tv_name = row['tv_name']

    url = 'http://search.kankan.com/search.php?keyword=' + quote(str(tv_name))
    need_blank = True
    try:
        driver.get(url)
    except Exception, e:
        driver.execute_script('window.stop()')

    # 解析第一页
    divs = driver.find_elements_by_xpath('//div[@class="searchmain"]/div')
    for div in divs:
        try:
            title = div.find_element_by_xpath('//div[@class="reuslt_tt"]/h2/a').get_attribute('title')
            href = div.find_element_by_xpath('./div/a').get_attribute('href')
            _type = div.find_element_by_xpath('./div/div[2]').get_attribute('textContent')
            sources = div.find_element_by_xpath('//ul[@class="sitelist"]').get_attribute('textContent')
            if tv_name == title and u'电视剧' in _type and u'响巢看看' in sources:
                sql = """
                    update scrapy.wangju_url set url_kankan = '%s' where id = %s
                """
                sql = sql % (href, _id)
                Mysql.execute(sql, conn=conn)
                need_blank = False
        except Exception, e:
            continue
    if need_blank:
        sql = """
            update scrapy.wangju_url set url_kankan = '%s' where id = %s
        """
        sql = sql % ('', _id)
        Mysql.execute(sql, conn=conn)
driver.quit()