mix_train_518.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from sklearn.preprocessing import MinMaxScaler
  11. from keras.utils import np_utils
  12. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  13. # Flatten作用是将多位输入进行一维化
  14. # Dense是全连接层
  15. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
  16. from keras import regularizers
  17. from keras.models import Model
  18. from keras.callbacks import EarlyStopping
  19. from keras import backend as K
  20. K.set_image_data_format('channels_first')
  21. early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
  22. epochs= 155
  23. size = 380000
  24. file_path = 'D:\\data\\quantization\\stock571_12d_train2.log'
  25. model_path = '570_5d_mix_5D_ma5_s_seq.h5'
  26. file_path1='D:\\data\\quantization\\stock563_12d_test.log'
  27. row = 5
  28. col = 31
  29. '''
  30. 0 dmi 28*20 38,95,72/25 下跌预判非常准 54,95,74
  31. 1 macd 28*19 41,98,53/8
  32. 2 dmi-对大盘对比 28*20 35,99,67/32>>
  33. 3 5d-dmi-对大盘对比 28*20 42,99,39/10
  34. 4 3d-dmi-对大盘对比 28*20 40,99,39/07
  35. 5 3d-beta1 55,99,52/07 当前用这个
  36. 6 3d-ma20 40,99,41/07
  37. 7 3d-macd 28*19 55,99,40/07
  38. 8 3d-市值>30 28*20 57,99,56/40>> 最高价 用这个! 43,98,59
  39. 9 3d-市值>30 28*20 57,99,31/08 收盘最高价
  40. 10 5d-市值>30 28*20 收盘最高价
  41. 11 5d-市值>30 28*20 ma5
  42. 12 5d-极简 28*16 有ma5,ma20 46,102,16/26 test it
  43. 13 3d-最高价 28*16 57,101,39,16
  44. 14 5d-极简-最高价 28*16 40,101,47-56 test it 不行 >> 33,100,49
  45. 15 5d+dmi+最高价 28*20 40,101,48-56 test it 不行
  46. 16 同12,14,参数11,10 28*16 38,101,47-57 test it 不行
  47. 17 同上参数11,6 28*16 39,101,47-57 !
  48. 35 指数向量化 28*14 43,101,47-57
  49. 36 去掉指数 28*14 40,101,47-57
  50. 37 指数向量化 修改result已自己为基点 28*17 37,101,47-57 24,101,47
  51. 38 指数向量化++ 修改result已自己为基点+win=5 28*17 39,101,46-57
  52. 39 指数向量化++ 修改result已自己为基点+win=8 28*17 37,101,47-57
  53. 40 指数向量化++ 修改result已自己为基点+win=11 28*17 44,101,45-56
  54. 41 bigquantor win=11,21 28*21 28,99,53-59
  55. 42 bigquantor win=7,21 28*21 31,102,42-57
  56. 43 bigquantor win=6,6 18*21 26,101,44-56 #共40W
  57. 44 bigquantor win=4,5 18*21 24,101,45-57
  58. 45 bigquantor win=4,4 12*21 27,101,45-57
  59. 46 bigquantor win=3,6 12*21 43,101,44-54
  60. 47 bigquantor win=3,8 12*21 34,101,43-57
  61. 49 bigquantor win=5,8,stride=1 12*21 33,101,45-57
  62. 50 bigquantor(open) win=4,4 12*21 34,100,49-56 # 24,100,49
  63. 51 bigquantor(open) win=5,5 12*21 34,100,49-55
  64. 52 bigquantor(open) win=3,16 12*21 33,100,49-56
  65. 53 bigquantor(open) win=4,21 12*21 40,101,46-55
  66. 53A bigquantor(open) win=4,21 用1层 12*21 36,101,46-55
  67. 53B bigquantor(open) win=4*21+4*4 用2层 12*21
  68. 54 指数向量化++(open) 16*17
  69. 55 指数向量化++(open)+olhc 16*17
  70. 56 指数向量化++bigquantor win=3*25+4*4 用2层 12*25 29,100,49-55
  71. 56A 指数向量化++bigquantor win=3*25 用1层 12*25
  72. 57 指数向量化++bigquantor win=3*25 用1层 12*25 28,100,48-56
  73. 57A 指数向量化++bigquantor win=6*12 用1层 12*25 27,101,46-53
  74. 58 指数向量化++bigquantor win=6*12 用1层 6*25 倒过来很吊...
  75. 58A 指数向量化++bigquantor win=3*25 用1层 6*25
  76. 61 简化 5*11 42w 15,101,45-50
  77. 62 简化 3*9 42w
  78. 63 win=3*9 6*9 14,100,0
  79. 63A win=2*9 30,101,46
  80. 63B win=1*9 34,101,45
  81. 63C win=2*2 用两层 31,101,46
  82. 63D win=3*3 用两层 25,100,47
  83. 18 拆成两个,stripe=2,win=5 40,102,30-36
  84. 19 拆成两个短,15-5-2/15-3-1 38,102,31
  85. 20 拆成1长1短 28-7-2/10-3-1 34,102,29-36
  86. 21 换手率用ln函数 涨幅使用ln函数 28*12 34,100,48,36
  87. 22 Alpha#101: 28*12 44,100,48-36 ? ------ 34,100,48-36
  88. 23 在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 28*9 36,102,47,100,36----25,100,48,101-36
  89. 23A win=4 35,101,46,100,57
  90. 24 Alpha#44: + rank_33 28*10 34,101,47,100,36
  91. 25 纯dmi 101 22*9 29,101,46,100,36
  92. 26 macd 101 22*8 28,101,46,101,36
  93. 27 alpha2+alpha44 22*10 38,101,46,100,36
  94. 28 rank_2_a+rank_2_b+alpha#51 22*10 37,101,47,101,57
  95. 29 Alpha#53 去掉价格 18*6 30,101,47,101,58
  96. 30 有价格 101+54 18*13 33,101,47,101,57
  97. 31 没价格 54 win=4 18*8 40,101,48,100,56
  98. 32 同23在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 18*9 38,101,47,100,57
  99. 33 修改test值 18*9 36,98,61,36 25,97,62
  100. 34 +换手率+dmi修正+alpha53+18 18*17 36,98,60,37
  101. 24 Alpha#6:
  102. 25 Alpha#9:
  103. 27 Alpha#12:
  104. 29 Alpha#23:
  105. 31 Alpha#51:
  106. 33 Alpha#54:
  107. 34 Alpha#2:
  108. 35 Alpha#9:
  109. 36 Alpha#12:
  110. 37 Alpha#18:
  111. 38
  112. 39
  113. 40
  114. '''
  115. def read_data(path, path1=file_path1):
  116. lines = []
  117. with open(path) as f:
  118. for line in f.readlines(): #680000
  119. x = eval(line.strip())
  120. lines.append(x)
  121. # with open(path1) as f:
  122. # for x in range(30000): #6w
  123. # line = eval(f.readline().strip())
  124. # lines.append(line)
  125. random.shuffle(lines)
  126. print('读取数据完毕')
  127. d=int(0.85*len(lines))
  128. length = len(lines[0])
  129. train_x=[s[:length - 2] for s in lines[0:d]]
  130. train_y=[s[-1] for s in lines[0:d]]
  131. test_x=[s[:length - 2] for s in lines[d:]]
  132. test_y=[s[-1] for s in lines[d:]]
  133. print('转换数据完毕')
  134. ros = RandomOverSampler(random_state=0)
  135. X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
  136. # mm_scalar = MinMaxScaler()
  137. # X_resampled = mm_scalar.fit_transform(X_resampled)
  138. print('数据重采样完毕')
  139. return X_resampled,y_resampled, np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
  140. train_x,train_y,test_x,test_y=read_data(file_path)
  141. train_x_a = train_x[:,:row*col]
  142. train_x_a = train_x_a.reshape(train_x.shape[0], 1, row, col)
  143. # train_x_b = train_x[:, 18*col:row*col]
  144. # train_x_b = train_x_b.reshape(train_x.shape[0], 10, col, 1)
  145. train_x_c = train_x[:,row*col:]
  146. def create_mlp(dim, regress=False):
  147. # define our MLP network
  148. model = Sequential()
  149. model.add(Dense(44, input_dim=dim, activation="relu"))
  150. model.add(Dropout(0.2))
  151. model.add(Dense(44, activation="relu"))
  152. # model.add(Dense(96, activation="relu"))
  153. # model.add(Dense(128, activation="relu"))
  154. # check to see if the regression node should be added
  155. if regress:
  156. model.add(Dense(1, activation="linear"))
  157. # return our model
  158. return model
  159. def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=1):
  160. # initialize the input shape and channel dimension, assuming
  161. # TensorFlow/channels-last ordering
  162. inputShape = (1, width, height)
  163. chanDim = -1
  164. # define the model input
  165. inputs = Input(shape=inputShape)
  166. # x = inputs
  167. # CONV => RELU => BN => POOL
  168. x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs)
  169. x = Activation("relu")(x)
  170. x = BatchNormalization(axis=chanDim)(x)
  171. # x = MaxPooling2D(pool_size=(2,2))(x)
  172. if width > 2:
  173. x = Conv2D(32, (2,2), padding="same", strides=1)(x)
  174. x = Activation("relu")(x)
  175. x = BatchNormalization(axis=chanDim)(x)
  176. # y = Activation("relu")(y)
  177. # y = BatchNormalization(axis=chanDim)(y)
  178. # flatten the volume, then FC => RELU => BN => DROPOUT
  179. x = Flatten()(x)
  180. x = Dense(output)(x)
  181. x = Activation("relu")(x)
  182. x = BatchNormalization(axis=chanDim)(x)
  183. x = Dropout(0.2)(x)
  184. # apply another FC layer, this one to match the number of nodes
  185. # coming out of the MLP
  186. x = Dense(output)(x)
  187. x = Activation("relu")(x)
  188. # check to see if the regression node should be added
  189. if regress:
  190. x = Dense(1, activation="linear")(x)
  191. # construct the CNN
  192. model = Model(inputs, x)
  193. # return the CNN
  194. return model
  195. # create the MLP and CNN models
  196. mlp = create_mlp(train_x_c.shape[1], regress=False)
  197. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
  198. cnn_0 = create_cnn(row, col, 1, kernel_size=(2, 2), size=36, regress=False, output=88, strides=1) # 29 98 47
  199. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
  200. # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
  201. # cnn_1 = create_cnn(10, col, 1, kernel_size=(3, col), size=66, regress=False, output=66, strides=1)
  202. # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
  203. # create the input to our final set of layers as the *output* of both
  204. # the MLP and CNN
  205. combinedInput = concatenate([mlp.output, cnn_0.output, ])
  206. # our final FC layer head will have two dense layers, the final one
  207. # being our regression head
  208. x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
  209. x = Dropout(0.2)(x)
  210. x = Dense(1024, activation="relu")(x)
  211. x = Dense(1024, activation="relu")(x)
  212. # 在建设一层
  213. x = Dense(3, activation="softmax")(x)
  214. # our final model will accept categorical/numerical data on the MLP
  215. # input and images on the CNN input, outputting a single value (the
  216. # predicted price of the house)
  217. model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
  218. print("Starting training ")
  219. # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
  220. # compile the model using mean absolute percentage error as our loss,
  221. # implying that we seek to minimize the absolute percentage difference
  222. # between our price *predictions* and the *actual prices*
  223. opt = Adam(lr=1e-3, decay=1e-3 / 200)
  224. model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
  225. # train the model
  226. print("[INFO] training model...")
  227. model.fit(
  228. [train_x_c, train_x_a,], train_y,
  229. # validation_data=([testAttrX, testImagesX], testY),
  230. # epochs=int(3*train_x_a.shape[0]/1300),
  231. epochs=epochs,
  232. batch_size=4096, shuffle=True,
  233. callbacks=[early_stopping]
  234. )
  235. model.save(model_path)
  236. test_x_a = test_x[:,:row*col]
  237. test_x_a = test_x_a.reshape(test_x.shape[0], 1, row, col)
  238. # test_x_b = test_x[:, 18*col:row*col]
  239. # test_x_b = test_x_b.reshape(test_x.shape[0], 10, col, 1)
  240. test_x_c = test_x[:,row*col:]
  241. # make predictions on the testing data
  242. print("[INFO] predicting house prices...")
  243. score = model.evaluate([test_x_c, test_x_a, ], test_y)
  244. print(score)
  245. print('Test score:', score[0])
  246. print('Test accuracy:', score[1])