dnn_predict_dmi.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # -*- encoding:utf-8 -*-
  2. import numpy as np
  3. from keras.models import load_model
  4. import joblib
  5. def read_data(path):
  6. lines = []
  7. with open(path) as f:
  8. for line in f.readlines()[:]:
  9. line = eval(line.strip())
  10. if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
  11. lines.append(line)
  12. size = len(lines[0])
  13. train_x=[s[:size - 2] for s in lines]
  14. train_y=[s[size-1] for s in lines]
  15. return np.array(train_x),np.array(train_y),lines
  16. def _score(fact, line):
  17. with open('dnn_predict_dmi_18d.txt', 'a') as f:
  18. f.write(str([line[-2], line[-1]]) + "\n")
  19. up_right = 0
  20. up_error = 0
  21. if fact[0] == 1:
  22. up_right = up_right + 1.12
  23. elif fact[1] == 1:
  24. up_right = up_right + 1.06
  25. elif fact[2] == 1:
  26. up_right = up_right + 1
  27. up_error = up_error + 0.5
  28. elif fact[3] == 1:
  29. up_error = up_error + 1
  30. up_right = up_right + 0.94
  31. else:
  32. up_error = up_error + 1
  33. up_right = up_right + 0.88
  34. return up_right,up_error
  35. def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
  36. test_x,test_y,lines=read_data(file_path)
  37. model=load_model(model_path)
  38. score = model.evaluate(test_x, test_y)
  39. print('DNN', score)
  40. up_num = 0
  41. up_error = 0
  42. up_right = 0
  43. down_num = 0
  44. down_error = 0
  45. down_right = 0
  46. i = 0
  47. result=model.predict(test_x)
  48. win_dnn = []
  49. for r in result:
  50. fact = test_y[i]
  51. if idx in [-2]:
  52. if r[0] > 0.5 or r[1] > 0.5:
  53. pass
  54. else:
  55. if r[0] > 0.6 or r[1] > 0.6:
  56. tmp_right,tmp_error = _score(fact, lines[i])
  57. up_right = tmp_right + up_right
  58. up_error = tmp_error + up_error
  59. up_num = up_num + 1
  60. elif r[3] > 0.5 or r[4] > 0.5:
  61. if fact[0] == 1:
  62. down_error = down_error + 1
  63. down_right = down_right + 1.12
  64. elif fact[1] == 1:
  65. down_error = down_error + 1
  66. down_right = down_right + 1.06
  67. elif fact[2] == 1:
  68. down_right = down_right + 1
  69. elif fact[3] == 1:
  70. down_right = down_right + 0.94
  71. else:
  72. down_right = down_right + 0.88
  73. down_num = down_num + 1
  74. i = i + 1
  75. if up_num == 0:
  76. up_num = 1
  77. if down_num == 0:
  78. down_num = 1
  79. print('DNN', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
  80. return win_dnn,up_right/up_num,down_right/down_num
  81. def multi_predict(model='14_18d'):
  82. r = 0;
  83. p = 0
  84. for x in range(0, 12): # 0,2,3,4,6,8,9,10,11
  85. # for x in [5,9,11,0,3,4,8]: #10_18,0没数据需要重新计算 [0,2,3,4,5,9,10,11]
  86. # for x in [0,1,10]:
  87. # for x in [2,4,7,10]: # 2表现最好 优秀的 0,8正确的反向指标,(9错误的反向指标 样本量太少)
  88. print(x)
  89. # for x in [0,2,5,6,7]: # 5表现最好
  90. win_dnn, up_ratio,down_ratio = predict(file_path='D:\\data\\quantization\\kmeans\\stock' + model + '_test_' + str(x) + '.log',
  91. model_path=model + '_dnn_seq_' + str(x) + '.h5', idx=x)
  92. r = r + up_ratio
  93. p = p + down_ratio
  94. print(r, p)
  95. import pymongo
  96. from util.mongodb import get_mongo_table_instance
  97. code_table = get_mongo_table_instance('tushare_code')
  98. k_table = get_mongo_table_instance('stock_day_k')
  99. industry = ['家用电器', '元器件', 'IT设备', '汽车服务',
  100. '汽车配件', '软件服务',
  101. '互联网', '纺织',
  102. '塑料', '半导体',]
  103. def predict_today(day, model='10_18d'):
  104. lines = []
  105. with open('D:\\data\\quantization\\stock' + model + '_' + str(day) +'.log') as f:
  106. for line in f.readlines()[:]:
  107. line = eval(line.strip())
  108. # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
  109. lines.append(line)
  110. size = len(lines[0])
  111. train_x=[s[:size - 1] for s in lines]
  112. np.array(train_x)
  113. estimator = joblib.load('km_dmi_18.pkl')
  114. models = []
  115. for x in range(0, 12):
  116. models.append(load_model(model + '_dnn_seq_' + str(x) + '.h5'))
  117. x = 24 # 每条数据项数
  118. k = 18 # 周期
  119. for line in lines:
  120. v = line[1:x*k + 1]
  121. v = np.array(v)
  122. v = v.reshape(k, x)
  123. v = v[:,4:8]
  124. v = v.reshape(1, 4*k)
  125. # print(v)
  126. r = estimator.predict(v)
  127. # if r[0] in [1,6,10]:
  128. # train_x = np.array([line[:size - 1]])
  129. #
  130. # result = models[r[0]].predict(train_x)
  131. # if result[0][3] > 0.5 or result[0][4] > 0.5:
  132. # stock = code_table.find_one({'ts_code':line[-1][0]})
  133. # if stock['name'].startswith('ST') or stock['name'].startswith('N') or stock['name'].startswith('*'):
  134. # continue
  135. # if line[0] > 80:
  136. # continue
  137. # if stock['industry'] in industry:
  138. # pass
  139. # # print(line[-1], stock['name'], stock['industry'], 'sell')
  140. if r[0] in [2,5,9,10,11]:
  141. train_x = np.array([line[:size - 1]])
  142. result = models[r[0]].predict(train_x)
  143. # print(result, line[-1])
  144. if result[0][0] > 0.6 or result[0][1] > 0.6:
  145. if line[-1][0].startswith('688'):
  146. continue
  147. # 去掉ST
  148. stock = code_table.find_one({'ts_code':line[-1][0]})
  149. if stock['name'].startswith('ST') or stock['name'].startswith('N') or stock['name'].startswith('*'):
  150. continue
  151. # 跌的
  152. k_table_list = list(k_table.find({'code':line[-1][0], 'tradeDate':{'$lte':day}}).sort("tradeDate", pymongo.DESCENDING).limit(5))
  153. if k_table_list[0]['close'] > k_table_list[-1]['close']*1.20:
  154. continue
  155. if k_table_list[0]['close'] < k_table_list[-1]['close']*0.90:
  156. continue
  157. if k_table_list[-1]['close'] > 80:
  158. continue
  159. # 指定某几个行业
  160. # if stock['industry'] in industry:
  161. print(line[-1], stock['name'], stock['industry'], 'buy')
  162. if __name__ == '__main__':
  163. # predict(file_path='D:\\data\\quantization\\stock16_18d_test.log', model_path='16_18d_cnn_seq.h5')
  164. # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
  165. multi_predict(model='16_18d')
  166. # predict_today(20200229, model='11_18d')