dnn_train.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. from keras.layers import Dense,Dropout
  6. import random
  7. from keras import regularizers
  8. from keras.callbacks import EarlyStopping
  9. from imblearn.over_sampling import RandomOverSampler
  10. import joblib
  11. import tensorflow
  12. early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
  13. def read_data(path):
  14. lines = []
  15. with open(path) as f:
  16. for line in f.readlines()[:]:
  17. lines.append(eval(line.strip()))
  18. with open("D:\\data\\quantization\\stock578N_12d_train3.log") as f:
  19. for line in f.readlines()[:]:
  20. lines.append(eval(line.strip()))
  21. random.shuffle(lines)
  22. print('读取数据完毕')
  23. d=int(0.95*len(lines))
  24. size = len(lines[0])
  25. train_x=[s[:size - 2] for s in lines[0:d]]
  26. train_y=[s[size-1] for s in lines[0:d]]
  27. test_x=[s[:size - 2] for s in lines[d:]]
  28. test_y=[s[size-1] for s in lines[d:]]
  29. print('转换数据完毕')
  30. ros = RandomOverSampler(random_state=0)
  31. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  32. print('数据重采样完毕')
  33. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  34. def resample(path):
  35. lines = []
  36. with open(path) as f:
  37. for x in range(330000):
  38. lines.append(eval(f.readline().strip()))
  39. estimator = joblib.load('km.pkl')
  40. x = 17
  41. for line in lines:
  42. v = line[1:x*10 + 1]
  43. v = np.array(v)
  44. v = v.reshape(10, x)
  45. v = v[:,0:4]
  46. v = v.reshape(1, 40)
  47. # print(v)
  48. r = estimator.predict(v)
  49. with open('D:\\data\\quantization\\kmeans\\stock8_14_train_' + str(r[0]) + '.log', 'a') as f:
  50. f.write(str(line) + '\n')
  51. def mul_train():
  52. # for x in range(0, 16):
  53. for x in [0]:
  54. train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock2_10_" + str(x) + ".log",
  55. model_name='5d_dnn_seq_' + str(x) + '.h5')
  56. def train(result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''):
  57. train_x,train_y,test_x,test_y=read_data(file_path)
  58. input_dim = train_x.shape[1]
  59. model = Sequential()
  60. model.add(Dense(units=320+input_dim, input_dim=input_dim, activation='relu'))
  61. model.add(Dense(units=320+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.001)))
  62. model.add(Dense(units=320+input_dim, activation='relu'))
  63. model.add(Dropout(0.1))
  64. # model.add(Dense(units=220 + input_dim, activation='relu'))
  65. # model.add(Dropout(0.1))
  66. model.add(Dense(units=320+input_dim, activation='selu'))
  67. model.add(Dropout(0.1))
  68. model.add(Dense(units=320+input_dim, activation='selu'))
  69. # model.add(Dropout(0.1))
  70. model.add(Dense(units=512, activation='relu'))
  71. model.add(Dense(units=result_class, activation='softmax'))
  72. model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
  73. print("Starting training ")
  74. # model.fit(train_x, train_y, batch_size=1024, epochs=400 + 4*int(len(train_x)/1000), shuffle=True)
  75. model.fit(train_x, train_y, batch_size=4096*2, epochs=128, shuffle=True, callbacks=[early_stopping])
  76. score = model.evaluate(test_x, test_y)
  77. print(score)
  78. print('Test score:', score[0])
  79. print('Test accuracy:', score[1])
  80. model.save(model_name)
  81. # model=None
  82. # model=load_model(model_name)
  83. # result=model.predict(test_x)
  84. # print(result)
  85. # print(test_y)
  86. '''
  87. 559 input_dim=53 24,101,47
  88. 560 input_dim=63 23,101,44
  89. 561 6d input_dim=63 21,100,50
  90. 562 3d input_dim=35 23,100,48
  91. 563 6d input_dim=62 22,101,46
  92. 564 6d input_dim=26 29,100,47
  93. 565 6d input_dim=26
  94. 566 6d input_dim=26 42,98,57 随机44,97,60
  95. 567 2d input_dim=70 >0.9 30,100,51 随机44,97,60
  96. 568 3d-3 input_dim= >0.7 35,97,64
  97. 569 4d-3 input_dim= >0.7 33,100,52 >0.9 33,100.3,50
  98. 571 6d-3 >0.7 37,100,48
  99. 572 5d-3 + macd+roc 37,101,46
  100. 573 +index 32,101,44
  101. 574 modify 35,101.9,43
  102. 575 modify 34,101,45
  103. 576 去掉index 35,101,47
  104. 577 加上r0_ratio 34,100,48
  105. 578 570修正 30,101,47
  106. 578A 38,101.7 44
  107. 578B 4class
  108. 578C 加上指数 38,99,52
  109. 578D 指数涨幅 34,100,50
  110. 578E 涨停数 39,100,51
  111. 578F 加上是否最高最低 34,,100.9,48
  112. 579G 换成dmi 32 100.9 47
  113. 579H 回退 去掉roc等 39,101.5,45
  114. 579I 加上是否最高最低 30,100,50
  115. 579J 加上DMI 34,101,47
  116. 574A >0.8 34,98,59 36,1.86,25
  117. 570 5d-3 input_dim= >0.9 32,102,42 随机45,99,56
  118. 578K 用上日收盘价 39,101,46
  119. 578L 仅ROC 41,101.6,43 41,2.036,29
  120. 578M 仅macd 41,101.7,43 41,2.035,28
  121. 578N 仅DMI 42,2.079,33|全数据50,2.67,72 随机45,1.82,20
  122. 578O 都去掉 41,2.013,29
  123. 580 去掉Low,High
  124. '''
  125. if __name__ == '__main__':
  126. # train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
  127. train(result_class=3, file_path="D:\\data\\quantization\\stock580_12d_train2.log", model_name='5d_580_dnn_seq.h5')
  128. # resample('D:\\data\\quantization\\stock8_14.log')
  129. # mul_train()