week_train_100.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from imblearn.over_sampling import RandomOverSampler
  9. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  10. # Flatten作用是将多位输入进行一维化
  11. # Dense是全连接层
  12. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
  13. from keras import regularizers
  14. from keras.models import Model
  15. from keras.callbacks import EarlyStopping
  16. early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
  17. epochs= 77
  18. # size = 24000 #共68W
  19. file_path = 'D:\\data\\quantization\\week120_18d_train1.log'
  20. model_path = '120_18d_mix_3W_s_seqA.h5'
  21. row = 18
  22. col = 9
  23. col1 = 13
  24. '''
  25. 0 18-3 18*11 25,102,47-29
  26. 1 18W预测3周后最高价+pe 18*11 37,101,44-22
  27. 2 18*11 + 11*16 33,101,41-30
  28. 3 stripe=1,win=4-3 18*11 + 11*16 31,108,19-34 ----- 随机25,100,51-26
  29. 4 stripe=1,win=3 18*11 + 11*16 34,103,41-26
  30. 5 stripe=1,win=3 18*11
  31. 6 用ma来衡量
  32. 7 简化模型
  33. 8 ma5-大盘相关+alpha_6 18*11 + 11*16 ------25,96,69
  34. 9 ma5-大盘相关+alpha_44+alpha_2 51,96,68-07
  35. 10 ma5-大盘相关+alpha_53+alpha_18 48,97,61-06
  36. 11 high-大盘相关+alpha_53+alpha_18 35,103,39-37
  37. 12 high-大盘相关+alpha_53+alpha_18(每日) 18*11 + 11*17 33,101,46-30
  38. 13 high-大盘相关+alpha_53+alpha_18-dmi 18*6 + 11*16 37,105,33-32
  39. 14 high-大盘相关+alpha_53+alpha_18-dmi+result修改(自己对比) 18*6 + 11*16 17,97,59
  40. 15 high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi 18*9 + 11*14 26,99,53-22 !!!
  41. 16 high-pettm 18*9 + 11*13 29,99,54-26
  42. 17 high+大盘相关 18*9 + 11*13 26,98,54-27
  43. 18 high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi 8周后 18*9 + 11*14 19,111,8,55 16 ----- 随机24,100,49
  44. 19 high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi 4周后 18*9 + 11*14 26,113,2,22 73条数据
  45. 20 high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+双dmi 4周后 18*9 + 11*13 32,110,11,26 大盘超好的时候可以用这种
  46. '''
  47. def read_data(path):
  48. lines = []
  49. with open(path) as f:
  50. for x in f.readlines()[:]: #680000
  51. line = eval(x.strip())
  52. lines.append(line)
  53. # with open(path1) as f:
  54. # for x in f.readlines()[:]: #680000
  55. # line = eval(x.strip())
  56. # lines.append(line)
  57. random.shuffle(lines)
  58. print('读取数据完毕')
  59. d=int(0.95*len(lines))
  60. length = len(lines[0])
  61. train_x=[s[:length - 2] for s in lines[0:d]]
  62. train_y=[s[-1] for s in lines[0:d]]
  63. test_x=[s[:length - 2] for s in lines[d:]]
  64. test_y=[s[-1] for s in lines[d:]]
  65. print('转换数据完毕')
  66. ros = RandomOverSampler(random_state=0)
  67. X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
  68. print('数据重采样完毕')
  69. return X_resampled,y_resampled,np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
  70. train_x,train_y,test_x,test_y=read_data(file_path)
  71. train_x_a = train_x[:,:row*col]
  72. train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
  73. train_x_b = train_x[:, row*col:row*col + 11*col1]
  74. train_x_b = train_x_b.reshape(train_x.shape[0], 11, col1, 1)
  75. train_x_c = train_x[:,row*col + 11*col1:]
  76. def create_mlp(dim, regress=False):
  77. # define our MLP network
  78. model = Sequential()
  79. model.add(Dense(256, input_dim=dim, activation="relu"))
  80. model.add(Dropout(0.2))
  81. model.add(Dense(256, activation="relu"))
  82. # model.add(Dense(256, activation="relu"))
  83. model.add(Dense(128, activation="relu"))
  84. # check to see if the regression node should be added
  85. if regress:
  86. model.add(Dense(1, activation="linear"))
  87. # return our model
  88. return model
  89. def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=2):
  90. # initialize the input shape and channel dimension, assuming
  91. # TensorFlow/channels-last ordering
  92. inputShape = (width, height, 1)
  93. chanDim = -1
  94. # define the model input
  95. inputs = Input(shape=inputShape)
  96. # x = inputs
  97. # CONV => RELU => BN => POOL
  98. x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs)
  99. x = Activation("relu")(x)
  100. x = BatchNormalization(axis=chanDim)(x)
  101. # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
  102. # y = Activation("relu")(y)
  103. # y = BatchNormalization(axis=chanDim)(y)
  104. # flatten the volume, then FC => RELU => BN => DROPOUT
  105. x = Flatten()(x)
  106. x = Dense(output)(x)
  107. x = Activation("relu")(x)
  108. x = BatchNormalization(axis=chanDim)(x)
  109. x = Dropout(0.2)(x)
  110. # apply another FC layer, this one to match the number of nodes
  111. # coming out of the MLP
  112. x = Dense(output)(x)
  113. x = Activation("relu")(x)
  114. # check to see if the regression node should be added
  115. if regress:
  116. x = Dense(1, activation="linear")(x)
  117. # construct the CNN
  118. model = Model(inputs, x)
  119. # return the CNN
  120. return model
  121. # create the MLP and CNN models
  122. mlp = create_mlp(train_x_c.shape[1], regress=False)
  123. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
  124. cnn_0 = create_cnn(row, col, 1, kernel_size=(4, col), size=66, regress=False, output=66) # 29 98 47
  125. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
  126. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
  127. cnn_1 = create_cnn(11, col1, 1, kernel_size=(3, col1), size=66, regress=False, output=66, strides=1)
  128. # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
  129. # create the input to our final set of layers as the *output* of both
  130. # the MLP and CNN
  131. combinedInput = concatenate([mlp.output, cnn_0.output, cnn_1.output])
  132. # our final FC layer head will have two dense layers, the final one
  133. # being our regression head
  134. x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
  135. x = Dropout(0.2)(x)
  136. x = Dense(1024, activation="relu")(x)
  137. x = Dense(1024, activation="relu")(x)
  138. # 在建设一层
  139. x = Dense(4, activation="softmax")(x)
  140. # our final model will accept categorical/numerical data on the MLP
  141. # input and images on the CNN input, outputting a single value (the
  142. # predicted price of the house)
  143. model = Model(inputs=[mlp.input, cnn_0.input, cnn_1.input], outputs=x)
  144. print("Starting training ")
  145. # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
  146. # compile the model using mean absolute percentage error as our loss,
  147. # implying that we seek to minimize the absolute percentage difference
  148. # between our price *predictions* and the *actual prices*
  149. opt = Adam(lr=1e-3, decay=1e-3 / 200)
  150. model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
  151. # train the model
  152. print("[INFO] training model...")
  153. model.fit(
  154. [train_x_c, train_x_a, train_x_b], train_y,
  155. # validation_data=([testAttrX, testImagesX], testY),
  156. # epochs=int(3*train_x_a.shape[0]/1300),
  157. epochs=epochs,
  158. batch_size=2048, shuffle=True,
  159. callbacks=[early_stopping]
  160. )
  161. model.save(model_path)
  162. test_x_a = test_x[:,:row*col]
  163. test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
  164. test_x_b = test_x[:, row*col:row*col + 11*col1]
  165. test_x_b = test_x_b.reshape(test_x.shape[0],11, col1, 1)
  166. test_x_c = test_x[:,row*col + 11*col1:]
  167. # make predictions on the testing data
  168. print("[INFO] predicting house prices...")
  169. score = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)
  170. print(score)
  171. print('Test score:', score[0])
  172. print('Test accuracy:', score[1])