scrapy_all.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. #/usr/bin/env python
  2. #coding=utf-8
  3. import random
  4. import sys
  5. import time
  6. from selenium import webdriver
  7. from urllib import quote
  8. from fty_util.common import Mysql
  9. reload(sys)
  10. sys.setdefaultencoding('utf8')
  11. def parse_playtimes():
  12. conn = Mysql.createOfflineConn()
  13. sql = """
  14. select id, tv_name, url, playtimes from scrapy.wangju_all_url order by id asc
  15. """
  16. rows = Mysql.getAll(sql, conn=conn)
  17. for row in rows:
  18. _id = row['id']
  19. tv_name = row['tv_name']
  20. url = row['url']
  21. playtimes = row['playtimes']
  22. if playtimes is not None and len(playtimes.split('*')) == 2:
  23. first_num, second_num = playtimes.split('*')
  24. first_num = float(first_num)
  25. second_num = int(second_num)
  26. playtimes_new = first_num * second_num
  27. sql = """
  28. update scrapy.wangju_all_url set playtimes = '%s' where url = '%s'
  29. """
  30. sql = sql % (str(int(playtimes_new)), url)
  31. Mysql.execute(sql, conn=conn)
  32. def update_fields():
  33. conn = Mysql.createOfflineConn()
  34. sql = """
  35. select id, tv_name, score, playtimes, source from scrapy.wangju_all_url order by id asc
  36. """
  37. rows = Mysql.getAll(sql, conn=conn)
  38. for row in rows:
  39. _id = row['id']
  40. tv_name = row['tv_name']
  41. score = row['score']
  42. playtimes = row['playtimes']
  43. source = row['source']
  44. if 'pptv' == source:
  45. sql = """
  46. update scrapy.wangju_url set pptv_score = '%s', pptv_playtimes = '%s' where id = %s
  47. """
  48. if 'youku' == source:
  49. sql = """
  50. update scrapy.wangju_url set youku_score = '%s', youku_playtimes = '%s' where id = %s
  51. """
  52. if 'sohu' == source:
  53. sql = """
  54. update scrapy.wangju_url set sohu_score = '%s', sohu_playtimes = '%s' where id = %s
  55. """
  56. if 'leshi' == source:
  57. sql = """
  58. update scrapy.wangju_url set leshi_score = '%s', leshi_playtimes = '%s' where id = %s
  59. """
  60. if 'huashutv' == source:
  61. sql = """
  62. update scrapy.wangju_url set huashutv_score = '%s', huashutv_playtimes = '%s' where id = %s
  63. """
  64. if 'iqiyi' == source:
  65. sql = """
  66. update scrapy.wangju_url set iqiyi_score = '%s', iqiyi_playtimes = '%s' where id = %s
  67. """
  68. if 'tengxun' == source:
  69. sql = """
  70. update scrapy.wangju_url set tengxun_score = '%s', tengxun_playtimes = '%s' where id = %s
  71. """
  72. sql = sql % (score, playtimes, _id)
  73. Mysql.execute(sql, conn=conn)
  74. if __name__ == '__main__':
  75. # parse_playtimes()
  76. update_fields()