dnn_train_dmi.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # -*- encoding:utf-8 -*-
  2. import numpy as np
  3. from keras.models import Sequential
  4. from keras.layers import Dense,Dropout
  5. import random
  6. from keras import regularizers
  7. from imblearn.over_sampling import RandomOverSampler
  8. import joblib
  9. def read_data(path):
  10. lines = []
  11. with open(path) as f:
  12. # for x in range(30000):
  13. # lines.append(eval(f.readline().strip()))
  14. for line in f.readlines()[:]:
  15. lines.append(eval(line.strip()))
  16. random.shuffle(lines)
  17. print('读取数据完毕')
  18. d=int(0.81*len(lines))
  19. size = len(lines[0])
  20. train_x=[s[:size - 2] for s in lines[0:d]]
  21. train_y=[s[size-1] for s in lines[0:d]]
  22. test_x=[s[:size - 2] for s in lines[d:]]
  23. test_y=[s[size-1] for s in lines[d:]]
  24. print('转换数据完毕')
  25. ros = RandomOverSampler(random_state=0)
  26. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  27. print('数据重采样完毕')
  28. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  29. data_dir = 'D:\\data\\quantization\\'
  30. def resample(path, suffix='test'):
  31. lines = []
  32. with open(data_dir + path + '.log') as f:
  33. i = 0
  34. for x in range(64000): # 56万 11万 6.4万
  35. # print(i)
  36. lines.append(eval(f.readline().strip()))
  37. i = i + 1
  38. estimator = joblib.load('km_dmi_18.pkl')
  39. file_list = []
  40. for x in range(0, 12):
  41. file_list.append(open(data_dir + 'kmeans\\' + path[:17] + '_' + str(x) + '.log', 'a')) # stock14_18d_train
  42. x = 16 # 每条数据项数
  43. k = 18 # 周期
  44. for line in lines:
  45. v = line[0:x*k]
  46. v = np.array(v)
  47. v = v.reshape(k, x)
  48. v = v[:,6:10]
  49. v = v.reshape(1, 4*k)
  50. # print(v)
  51. r = estimator.predict(v)
  52. file_list[r[0]].write(str(line) + '\n')
  53. def mul_train(name="10_18d"):
  54. # for x in range(8, 12):
  55. for x in [8,5]:
  56. score = train(input_dim=480, result_class=5, file_path=data_dir + "kmeans\\stock"+ name + "_train_" + str(x) + ".log",
  57. model_name=name + '_dnn_seq_' + str(x) + '.h5')
  58. with open(data_dir + 'stock' + name + '_dmi.log', 'a') as f:
  59. f.write(str(x) + ':' + str(score[1]) + '\n')
  60. def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''):
  61. train_x,train_y,test_x,test_y=read_data(file_path)
  62. model = Sequential()
  63. model.add(Dense(units=320+input_dim, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.003)))
  64. model.add(Dense(units=300+input_dim, activation='relu'))
  65. model.add(Dropout(0.2))
  66. model.add(Dense(units=220+input_dim, activation='relu'))
  67. model.add(Dense(units=220+input_dim, activation='relu'))
  68. model.add(Dense(units=220+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.002)))
  69. model.add(Dropout(0.2))
  70. model.add(Dense(units=320 + input_dim, activation='relu'))
  71. # model.add(Dropout(0.2))
  72. # model.add(Dense(units=120+input_dim, activation='selu'))
  73. # model.add(Dense(units=120+input_dim, activation='selu'))
  74. model.add(Dense(units=666, activation='relu'))
  75. model.add(Dense(units=result_class, activation='softmax'))
  76. model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
  77. print("Starting training ")
  78. model.fit(train_x, train_y, batch_size=4096*4, epochs=400 + 4*int(len(train_x)/1600), shuffle=True)
  79. score = model.evaluate(test_x, test_y)
  80. print(score)
  81. print('Test score:', score[0])
  82. print('Test accuracy:', score[1])
  83. model.save(model_name)
  84. return score
  85. # model=None
  86. # model=load_model(model_name)
  87. # result=model.predict(test_x)
  88. # print(result)
  89. # print(test_y)
  90. if __name__ == '__main__':
  91. # resample('stock19_18d_test', suffix='test')
  92. mul_train('19_18d')