mix_train_300.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from keras.utils import np_utils
  11. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  12. # Flatten作用是将多位输入进行一维化
  13. # Dense是全连接层
  14. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
  15. from keras import regularizers
  16. from keras.models import Model
  17. from keras.callbacks import EarlyStopping
  18. early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
  19. epochs= 60
  20. size = 444000 #共68W
  21. file_path = 'D:\\data\\quantization\\stock327_28d_train2.log'
  22. model_path = '327_28d_mix_5D_ma5_s_seq.h5'
  23. file_path1='D:\\data\\quantization\\stock327_28d_test.log'
  24. file_path2='D:\\data\\quantization\\stock327_28d_train1.log'
  25. row = 28
  26. col = 20
  27. '''
  28. 30d+ma5+流通市值>40
  29. 0 ROC 30*18 38,100,17
  30. 1 DMI 30*20 39,101,13
  31. 2 MACD 30*19 34,100,19
  32. 3 RSI 30*17
  33. 28d+ma5+流通市值>5
  34. 10 ROC 28*18 36,100,18
  35. 11 DMI 28*20 37,101,16
  36. 12 MACD 28*19
  37. 28d+ma5+5+流通市值>10
  38. 21 DMI 28*20 43,102,9 非常好 46,102,8
  39. 22 MACD 28*19 46,102,9
  40. 25 DMI ma5+涨幅int 28*18 40,102,8/14
  41. 26 DMI 向量化 28*22 41,101,14,14
  42. 27 DMI 向量化 修正 28*20 41,101,14,14
  43. 1d close
  44. 23 DMI 28*20 34,97,36
  45. 3d close 去掉ma的两个字段
  46. 24 DMI 28*18 41,96,42-13
  47. 30d+close
  48. 4 ROC 30*18
  49. 5 DMI 30*20
  50. 6 MACD 30*19 32,96,44
  51. 7 RSI 30*17 31,96,42
  52. 24d+close
  53. 14 ROC 24*18 31,95,52
  54. '''
  55. def read_data(path, path1=file_path1):
  56. lines = []
  57. with open(path) as f:
  58. for x in range(size): #680000
  59. line = eval(f.readline().strip())
  60. lines.append(line)
  61. with open(path1) as f:
  62. for x in range(30000): #6w
  63. line = eval(f.readline().strip())
  64. lines.append(line)
  65. # with open(file_path2) as f:
  66. # for x in range(60000): #6w
  67. # line = eval(f.readline().strip())
  68. # lines.append(line)
  69. random.shuffle(lines)
  70. print('读取数据完毕')
  71. d=int(0.85*len(lines))
  72. length = len(lines[0])
  73. train_x=[s[:length - 2] for s in lines[0:d]]
  74. train_y=[s[-1] for s in lines[0:d]]
  75. test_x=[s[:length - 2] for s in lines[d:]]
  76. test_y=[s[-1] for s in lines[d:]]
  77. print('转换数据完毕')
  78. ros = RandomOverSampler(random_state=0)
  79. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  80. print('数据重采样完毕')
  81. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  82. train_x,train_y,test_x,test_y=read_data(file_path)
  83. train_x_a = train_x[:,:row*col]
  84. train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
  85. # train_x_b = train_x[:, 9*26:18*26]
  86. # train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
  87. train_x_c = train_x[:,row*col:]
  88. def create_mlp(dim, regress=False):
  89. # define our MLP network
  90. model = Sequential()
  91. model.add(Dense(256, input_dim=dim, activation="relu"))
  92. model.add(Dropout(0.2))
  93. model.add(Dense(256, activation="relu"))
  94. model.add(Dense(256, activation="relu"))
  95. model.add(Dense(128, activation="relu"))
  96. # check to see if the regression node should be added
  97. if regress:
  98. model.add(Dense(1, activation="linear"))
  99. # return our model
  100. return model
  101. def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
  102. # initialize the input shape and channel dimension, assuming
  103. # TensorFlow/channels-last ordering
  104. inputShape = (width, height, 1)
  105. chanDim = -1
  106. # define the model input
  107. inputs = Input(shape=inputShape)
  108. # x = inputs
  109. # CONV => RELU => BN => POOL
  110. x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
  111. x = Activation("relu")(x)
  112. x = BatchNormalization(axis=chanDim)(x)
  113. # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
  114. # y = Activation("relu")(y)
  115. # y = BatchNormalization(axis=chanDim)(y)
  116. # flatten the volume, then FC => RELU => BN => DROPOUT
  117. x = Flatten()(x)
  118. x = Dense(output)(x)
  119. x = Activation("relu")(x)
  120. x = BatchNormalization(axis=chanDim)(x)
  121. x = Dropout(0.2)(x)
  122. # apply another FC layer, this one to match the number of nodes
  123. # coming out of the MLP
  124. x = Dense(output)(x)
  125. x = Activation("relu")(x)
  126. # check to see if the regression node should be added
  127. if regress:
  128. x = Dense(1, activation="linear")(x)
  129. # construct the CNN
  130. model = Model(inputs, x)
  131. # return the CNN
  132. return model
  133. # create the MLP and CNN models
  134. mlp = create_mlp(train_x_c.shape[1], regress=False)
  135. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
  136. cnn_0 = create_cnn(row, col, 1, kernel_size=(6, col), size=96, regress=False, output=96) # 29 98 47
  137. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
  138. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
  139. # cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
  140. # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
  141. # create the input to our final set of layers as the *output* of both
  142. # the MLP and CNN
  143. combinedInput = concatenate([mlp.output, cnn_0.output, ])
  144. # our final FC layer head will have two dense layers, the final one
  145. # being our regression head
  146. x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
  147. x = Dropout(0.2)(x)
  148. x = Dense(1024, activation="relu")(x)
  149. x = Dense(1024, activation="relu")(x)
  150. # 在建设一层
  151. x = Dense(4, activation="softmax")(x)
  152. # our final model will accept categorical/numerical data on the MLP
  153. # input and images on the CNN input, outputting a single value (the
  154. # predicted price of the house)
  155. model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
  156. print("Starting training ")
  157. # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
  158. # compile the model using mean absolute percentage error as our loss,
  159. # implying that we seek to minimize the absolute percentage difference
  160. # between our price *predictions* and the *actual prices*
  161. opt = Adam(lr=1e-3, decay=1e-3 / 200)
  162. model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
  163. # train the model
  164. print("[INFO] training model...")
  165. model.fit(
  166. [train_x_c, train_x_a, ], train_y,
  167. # validation_data=([testAttrX, testImagesX], testY),
  168. # epochs=int(3*train_x_a.shape[0]/1300),
  169. epochs=epochs,
  170. batch_size=2048, shuffle=True,
  171. callbacks=[early_stopping]
  172. )
  173. model.save(model_path)
  174. test_x_a = test_x[:,:row*col]
  175. test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
  176. # test_x_b = test_x[:, 9*26:9*26+9*26]
  177. # test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
  178. test_x_c = test_x[:,row*col:]
  179. # make predictions on the testing data
  180. print("[INFO] predicting house prices...")
  181. score = model.evaluate([test_x_c, test_x_a,], test_y)
  182. print(score)
  183. print('Test score:', score[0])
  184. print('Test accuracy:', score[1])