dnn_train.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. from keras.layers import Dense,Dropout
  6. import random
  7. from keras import regularizers
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. import joblib
  11. def read_data(path):
  12. lines = []
  13. with open(path) as f:
  14. # for x in range(30000):
  15. # lines.append(eval(f.readline().strip()))
  16. for line in f.readlines()[:]:
  17. lines.append(eval(line.strip()))
  18. random.shuffle(lines)
  19. print('读取数据完毕')
  20. d=int(0.95*len(lines))
  21. size = len(lines[0])
  22. train_x=[s[:size - 2] for s in lines[0:d]]
  23. train_y=[s[size-1] for s in lines[0:d]]
  24. test_x=[s[:size - 2] for s in lines[d:]]
  25. test_y=[s[size-1] for s in lines[d:]]
  26. print('转换数据完毕')
  27. ros = RandomOverSampler(random_state=0)
  28. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  29. print('数据重采样完毕')
  30. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  31. def resample(path):
  32. lines = []
  33. with open(path) as f:
  34. for x in range(160000):
  35. lines.append(eval(f.readline().strip()))
  36. estimator = joblib.load('km.pkl')
  37. x = 17
  38. for line in lines:
  39. v = line[1:x*10 + 1]
  40. v = np.array(v)
  41. v = v.reshape(10, x)
  42. v = v[:,0:4]
  43. v = v.reshape(1, 40)
  44. # print(v)
  45. r = estimator.predict(v)
  46. with open('D:\\data\\quantization\\kmeans\\stock2_10_' + str(r[0]) + '.log', 'a') as f:
  47. f.write(str(line) + '\n')
  48. def mul_train():
  49. # for x in range(0, 16):
  50. for x in [7,9,12,14,15]:
  51. train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock2_10_" + str(x) + ".log",
  52. model_name='5d_dnn_seq_' + str(x) + '.h5')
  53. def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''):
  54. train_x,train_y,test_x,test_y=read_data(file_path)
  55. model = Sequential()
  56. model.add(Dense(units=120+input_dim, input_dim=input_dim, activation='relu'))
  57. model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.001)))
  58. model.add(Dense(units=120+input_dim, activation='relu'))
  59. model.add(Dense(units=120 + input_dim, activation='relu'))
  60. model.add(Dropout(0.1))
  61. model.add(Dense(units=60+input_dim, activation='selu'))
  62. # model.add(Dropout(0.2))
  63. # model.add(Dense(units=60+input_dim, activation='selu'))
  64. # model.add(Dropout(0.2))
  65. model.add(Dense(units=512, activation='relu'))
  66. model.add(Dense(units=result_class, activation='softmax'))
  67. model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
  68. print("Starting training ")
  69. model.fit(train_x, train_y, batch_size=32, epochs=117 + int(len(train_x)/900), shuffle=True)
  70. score = model.evaluate(test_x, test_y)
  71. print(score)
  72. print('Test score:', score[0])
  73. print('Test accuracy:', score[1])
  74. model.save(model_name)
  75. # model=None
  76. # model=load_model(model_name)
  77. # result=model.predict(test_x)
  78. # print(result)
  79. # print(test_y)
  80. if __name__ == '__main__':
  81. # train(input_dim=86, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
  82. # train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name='15m_dnn_seq.h5')
  83. # resample('D:\\data\\quantization\\stock6_5.log')
  84. mul_train()