mix_kmeans_train_1.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from keras.utils import np_utils
  11. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  12. # Flatten作用是将多位输入进行一维化
  13. # Dense是全连接层
  14. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
  15. from keras import regularizers
  16. from keras.models import Model
  17. epochs= 50
  18. size = 580000
  19. file_path = 'D:\\data\\quantization\\stock160_18d_10D_train.log'
  20. model_path = '160_18d_mix_5D_ma5_s_seq.h5'
  21. data_dir = 'D:\\data\\quantization\\'
  22. def read_data(path):
  23. lines = []
  24. with open(path) as f:
  25. i = 0
  26. for line in f.readlines()[:]:
  27. lines.append(eval(line.strip()))
  28. random.shuffle(lines)
  29. print('读取数据完毕')
  30. d=int(0.7*len(lines))
  31. train_x=[s[:-2] for s in lines[0:d]]
  32. train_y=[s[-1] for s in lines[0:d]]
  33. test_x=[s[:-2] for s in lines[d:]]
  34. test_y=[s[-1] for s in lines[d:]]
  35. print('转换数据完毕')
  36. ros = RandomOverSampler(random_state=0)
  37. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  38. print('数据重采样完毕')
  39. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  40. def mul_train(name="10_18d"):
  41. for x in range(0, 8):
  42. score = train(data_dir + 'kmeans\\' + name + "_trai_" + str(x) + ".log", x) # stock160_18d_trai_0
  43. with open(data_dir + name + '_mix.log', 'a') as f:
  44. f.write(str(x) + ':' + str(score[1]) + '\n')
  45. def train(file_path_name, idx):
  46. train_x,train_y,test_x,test_y=read_data(file_path_name)
  47. train_x_a = train_x[:,:18*24]
  48. train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
  49. # train_x_b = train_x[:, 18*18:18*18+2*18]
  50. # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
  51. train_x_c = train_x[:,18*24:]
  52. # create the MLP and CNN models
  53. mlp = create_mlp(train_x_c.shape[1], regress=False)
  54. cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
  55. # cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
  56. # create the input to our final set of layers as the *output* of both
  57. # the MLP and CNN
  58. combinedInput = concatenate([mlp.output, cnn_0.output])
  59. # our final FC layer head will have two dense layers, the final one
  60. # being our regression head
  61. x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
  62. x = Dropout(0.2)(x)
  63. x = Dense(512, activation="relu")(x)
  64. x = Dense(512, activation="relu")(x)
  65. # 在建设一层
  66. x = Dense(5, activation="softmax")(x)
  67. # our final model will accept categorical/numerical data on the MLP
  68. # input and images on the CNN input, outputting a single value (the
  69. # predicted price of the house)
  70. model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
  71. print("Starting training ")
  72. # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
  73. # compile the model using mean absolute percentage error as our loss,
  74. # implying that we seek to minimize the absolute percentage difference
  75. # between our price *predictions* and the *actual prices*
  76. opt = Adam(lr=1e-3, decay=1e-3 / 200)
  77. model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
  78. # train the model
  79. print("[INFO] training model...")
  80. model.fit(
  81. [train_x_c, train_x_a], train_y,
  82. # validation_data=([testAttrX, testImagesX], testY),
  83. # epochs=int(3*train_x_a.shape[0]/1300),
  84. epochs=epochs,
  85. batch_size=2048, shuffle=True)
  86. test_x_a = test_x[:,:18*24]
  87. test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
  88. # test_x_b = test_x[:, 18*18:18*18+2*18]
  89. # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
  90. test_x_c = test_x[:,18*24:]
  91. # make predictions on the testing data
  92. print("[INFO] predicting house prices...")
  93. score = model.evaluate([test_x_c, test_x_a], test_y)
  94. print(score)
  95. print('Test score:', score[0])
  96. print('Test accuracy:', score[1])
  97. model.save(model_path.split('.')[0] + '_' + str(idx) + '.h5')
  98. return score
  99. def create_mlp(dim, regress=False):
  100. # define our MLP network
  101. model = Sequential()
  102. model.add(Dense(64, input_dim=dim, activation="relu"))
  103. model.add(Dense(64, activation="relu"))
  104. # check to see if the regression node should be added
  105. if regress:
  106. model.add(Dense(1, activation="linear"))
  107. # return our model
  108. return model
  109. def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
  110. # initialize the input shape and channel dimension, assuming
  111. # TensorFlow/channels-last ordering
  112. inputShape = (width, height, 1)
  113. chanDim = -1
  114. # define the model input
  115. inputs = Input(shape=inputShape)
  116. x = inputs
  117. # CONV => RELU => BN => POOL
  118. x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
  119. x = Activation("relu")(x)
  120. x = BatchNormalization(axis=chanDim)(x)
  121. # x = MaxPooling2D(pool_size=(2, 2))(x)
  122. # if width > 2:
  123. # x = Conv2D(32, (10, 6), padding="same")(x)
  124. # x = Activation("relu")(x)
  125. # x = BatchNormalization(axis=chanDim)(x)
  126. # flatten the volume, then FC => RELU => BN => DROPOUT
  127. x = Flatten()(x)
  128. x = Dense(output)(x)
  129. x = Activation("relu")(x)
  130. x = BatchNormalization(axis=chanDim)(x)
  131. x = Dropout(0.2)(x)
  132. # apply another FC layer, this one to match the number of nodes
  133. # coming out of the MLP
  134. x = Dense(output)(x)
  135. x = Activation("relu")(x)
  136. # check to see if the regression node should be added
  137. if regress:
  138. x = Dense(1, activation="linear")(x)
  139. # construct the CNN
  140. model = Model(inputs, x)
  141. # return the CNN
  142. return model
  143. if __name__ == '__main__':
  144. mul_train('stock160_18d')