mix_train_200.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from keras.utils import np_utils
  11. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  12. # Flatten作用是将多位输入进行一维化
  13. # Dense是全连接层
  14. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
  15. from keras import regularizers
  16. from keras.models import Model
  17. from keras.callbacks import EarlyStopping
  18. early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
  19. epochs= 42
  20. size = 420000 #18W 60W
  21. file_path = 'D:\\data\\quantization\\stock217_18d_train2.log'
  22. model_path = '218_18d_mix_5D_ma5_s_seq.h5'
  23. file_path1='D:\\data\\quantization\\stock217_18d_test.log'
  24. '''
  25. ROC
  26. 0大盘涨停 涨停/跌停/创业板涨停跌停 42,97,45 43,97,36
  27. 1大盘涨停 涨停/跌停 48,98,38
  28. 2beta函数修正 46,98,39 47,97,27 <---2
  29. 3用两个kernel 42,98,42
  30. 4窗口大小修改
  31. 3*3 45,98,43 45,97,34 <---4
  32. 6*6 46,98,41
  33. 6*20 45,98,38 47,97,28 <---5
  34. 9*9 42,97,45 42,97,36 <---6
  35. 9*20 45,98,39
  36. 3*20 42,98,40 44,97,30 <---7
  37. 5 修改神经元
  38. mlp -> 4+3层 42,98,41 51,97,31 <---9
  39. 外层 -> 3+4层 52,97,37 <--11
  40. beta+4+3 49,96,37
  41. beta+2+5 52,96,38
  42. 15 流通市值>30 54,97,33
  43. 16 流通市值>30 + 5d 59,97,53
  44. 17 roc放后面, 18*18 58,97,31
  45. 18 roc放外面,18*18,两个cnn 59,97,28
  46. 19 窗口数180
  47. '''
  48. def read_data(path, path1=file_path1):
  49. lines = []
  50. with open(path) as f:
  51. for x in range(size): #610000
  52. line = eval(f.readline().strip())
  53. lines.append(line)
  54. with open(path1) as f:
  55. for x in range(50000):
  56. line = eval(f.readline().strip())
  57. lines.append(line)
  58. random.shuffle(lines)
  59. print('读取数据完毕')
  60. d=int(0.85*len(lines))
  61. length = len(lines[0])
  62. train_x=[s[:length - 2] for s in lines[0:d]]
  63. train_y=[s[-1] for s in lines[0:d]]
  64. test_x=[s[:length - 2] for s in lines[d:]]
  65. test_y=[s[-1] for s in lines[d:]]
  66. print('转换数据完毕')
  67. ros = RandomOverSampler(random_state=0)
  68. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  69. print('数据重采样完毕')
  70. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  71. train_x,train_y,test_x,test_y=read_data(file_path)
  72. train_x_a = train_x[:,:18*18]
  73. train_x_a = train_x_a.reshape(train_x.shape[0], 18, 18, 1)
  74. train_x_b = train_x[:, 18*18:18*18 + 2*18]
  75. train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
  76. train_x_c = train_x[:,18*18 + 2*18:]
  77. def create_mlp(dim, regress=False):
  78. # define our MLP network
  79. model = Sequential()
  80. model.add(Dense(128, input_dim=dim, activation="relu"))
  81. model.add(Dropout(0.2))
  82. model.add(Dense(128, activation="relu"))
  83. model.add(Dense(128, activation="relu"))
  84. # model.add(Dense(128, activation="relu"))
  85. # check to see if the regression node should be added
  86. if regress:
  87. model.add(Dense(1, activation="linear"))
  88. # return our model
  89. return model
  90. def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
  91. # initialize the input shape and channel dimension, assuming
  92. # TensorFlow/channels-last ordering
  93. inputShape = (width, height, 1)
  94. chanDim = -1
  95. # define the model input
  96. inputs = Input(shape=inputShape)
  97. # x = inputs
  98. # CONV => RELU => BN => POOL
  99. x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
  100. x = Activation("relu")(x)
  101. x = BatchNormalization(axis=chanDim)(x)
  102. # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
  103. # y = Activation("relu")(y)
  104. # y = BatchNormalization(axis=chanDim)(y)
  105. # flatten the volume, then FC => RELU => BN => DROPOUT
  106. x = Flatten()(x)
  107. x = Dense(output)(x)
  108. x = Activation("relu")(x)
  109. x = BatchNormalization(axis=chanDim)(x)
  110. x = Dropout(0.2)(x)
  111. # apply another FC layer, this one to match the number of nodes
  112. # coming out of the MLP
  113. x = Dense(output)(x)
  114. x = Activation("relu")(x)
  115. # check to see if the regression node should be added
  116. if regress:
  117. x = Dense(1, activation="linear")(x)
  118. # construct the CNN
  119. model = Model(inputs, x)
  120. # return the CNN
  121. return model
  122. # create the MLP and CNN models
  123. mlp = create_mlp(train_x_c.shape[1], regress=False)
  124. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
  125. cnn_0 = create_cnn(18, 18, 1, kernel_size=(6, 20), size=96, regress=False, output=99) # 29 98 47
  126. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
  127. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
  128. # cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
  129. cnn_1 = create_cnn(18, 2, 1, kernel_size=(6, 2), size=24, regress=False, output=24)
  130. # create the input to our final set of layers as the *output* of both
  131. # the MLP and CNN
  132. combinedInput = concatenate([mlp.output, cnn_0.output, cnn_1.output])
  133. # our final FC layer head will have two dense layers, the final one
  134. # being our regression head
  135. x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
  136. x = Dropout(0.2)(x)
  137. x = Dense(1024, activation="relu")(x)
  138. x = Dropout(0.2)(x)
  139. x = Dense(1024, activation="relu")(x)
  140. x = Dense(1024, activation="relu")(x)
  141. # x = Dense(512, activation="relu")(x)
  142. # 在建设一层
  143. x = Dense(3, activation="softmax")(x)
  144. # our final model will accept categorical/numerical data on the MLP
  145. # input and images on the CNN input, outputting a single value (the
  146. # predicted price of the house)
  147. model = Model(inputs=[mlp.input, cnn_0.input, cnn_1.input], outputs=x)
  148. print("Starting training ")
  149. # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
  150. # compile the model using mean absolute percentage error as our loss,
  151. # implying that we seek to minimize the absolute percentage difference
  152. # between our price *predictions* and the *actual prices*
  153. opt = Adam(lr=1e-3, decay=1e-3 / 200)
  154. model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
  155. # train the model
  156. print("[INFO] training model...")
  157. model.fit(
  158. [train_x_c, train_x_a, train_x_b], train_y,
  159. # validation_data=([testAttrX, testImagesX], testY),
  160. # epochs=int(3*train_x_a.shape[0]/1300),
  161. epochs=epochs,
  162. batch_size=2048, shuffle=True,
  163. callbacks=[early_stopping]
  164. )
  165. model.save(model_path)
  166. test_x_a = test_x[:,:18*18]
  167. test_x_a = test_x_a.reshape(test_x.shape[0], 18, 18, 1)
  168. test_x_b = test_x[:, 18*18:18*18+18*2]
  169. test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
  170. test_x_c = test_x[:,18*18 + 18*2:]
  171. # make predictions on the testing data
  172. print("[INFO] predicting house prices...")
  173. score = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)
  174. print(score)
  175. print('Test score:', score[0])
  176. print('Test accuracy:', score[1])