import keras # -*- encoding:utf-8 -*- import numpy as np from keras.models import Sequential # 优化方法选用Adam(其实可选项有很多,如SGD) from keras.optimizers import Adam import random from keras.models import load_model from imblearn.over_sampling import RandomOverSampler from keras.utils import np_utils # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层 # Flatten作用是将多位输入进行一维化 # Dense是全连接层 from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate from keras import regularizers from keras.models import Model from keras.callbacks import EarlyStopping early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2) epochs= 77 size = 440000 #共68W file_path = 'D:\\data\\quantization\\stock324_28d_train2.log' model_path = '324_28d_mix_5D_ma5_s_seq.h5' file_path1='D:\\data\\quantization\\stock324_28d_test.log' file_path2='D:\\data\\quantization\\stock324_28d_train1.log' row = 28 col = 18 ''' 30d+ma5+流通市值>40 0 ROC 30*18 38,100,17 1 DMI 30*20 39,101,13 2 MACD 30*19 34,100,19 3 RSI 30*17 28d+ma5+流通市值>5 10 ROC 28*18 36,100,18 11 DMI 28*20 37,101,16 12 MACD 28*19 28d+ma5+5+流通市值>10 21 DMI 28*20 43,102,9 非常好 46,102,8 22 MACD 28*19 46,102,9 1d close 23 DMI 28*20 34,97,36 3d close 去掉ma的两个字段 24 DMI 28*18 41,96,42-13 30d+close 4 ROC 30*18 5 DMI 30*20 6 MACD 30*19 32,96,44 7 RSI 30*17 31,96,42 24d+close 14 ROC 24*18 31,95,52 ''' def read_data(path, path1=file_path1): lines = [] with open(path) as f: for x in range(size): #680000 line = eval(f.readline().strip()) lines.append(line) with open(path1) as f: for x in range(50000): #6w line = eval(f.readline().strip()) lines.append(line) # with open(file_path2) as f: # for x in range(60000): #6w # line = eval(f.readline().strip()) # lines.append(line) random.shuffle(lines) print('读取数据完毕') d=int(0.85*len(lines)) length = len(lines[0]) train_x=[s[:length - 2] for s in lines[0:d]] train_y=[s[-1] for s in lines[0:d]] test_x=[s[:length - 2] for s in lines[d:]] test_y=[s[-1] for s in lines[d:]] print('转换数据完毕') ros = RandomOverSampler(random_state=0) X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y)) print('数据重采样完毕') return X_resampled,y_resampled,np.array(test_x),np.array(test_y) train_x,train_y,test_x,test_y=read_data(file_path) train_x_a = train_x[:,:row*col] train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1) # train_x_b = train_x[:, 9*26:18*26] # train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1) train_x_c = train_x[:,row*col:] def create_mlp(dim, regress=False): # define our MLP network model = Sequential() model.add(Dense(256, input_dim=dim, activation="relu")) model.add(Dropout(0.2)) model.add(Dense(256, activation="relu")) model.add(Dense(256, activation="relu")) model.add(Dense(128, activation="relu")) # check to see if the regression node should be added if regress: model.add(Dense(1, activation="linear")) # return our model return model def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24): # initialize the input shape and channel dimension, assuming # TensorFlow/channels-last ordering inputShape = (width, height, 1) chanDim = -1 # define the model input inputs = Input(shape=inputShape) # x = inputs # CONV => RELU => BN => POOL x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs) x = Activation("relu")(x) x = BatchNormalization(axis=chanDim)(x) # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs) # y = Activation("relu")(y) # y = BatchNormalization(axis=chanDim)(y) # flatten the volume, then FC => RELU => BN => DROPOUT x = Flatten()(x) x = Dense(output)(x) x = Activation("relu")(x) x = BatchNormalization(axis=chanDim)(x) x = Dropout(0.2)(x) # apply another FC layer, this one to match the number of nodes # coming out of the MLP x = Dense(output)(x) x = Activation("relu")(x) # check to see if the regression node should be added if regress: x = Dense(1, activation="linear")(x) # construct the CNN model = Model(inputs, x) # return the CNN return model # create the MLP and CNN models mlp = create_mlp(train_x_c.shape[1], regress=False) # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46 cnn_0 = create_cnn(row, col, 1, kernel_size=(6, col), size=96, regress=False, output=96) # 29 98 47 # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53 # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96) # cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96) # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64) # create the input to our final set of layers as the *output* of both # the MLP and CNN combinedInput = concatenate([mlp.output, cnn_0.output, ]) # our final FC layer head will have two dense layers, the final one # being our regression head x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput) x = Dropout(0.2)(x) x = Dense(1024, activation="relu")(x) x = Dense(1024, activation="relu")(x) # 在建设一层 x = Dense(4, activation="softmax")(x) # our final model will accept categorical/numerical data on the MLP # input and images on the CNN input, outputting a single value (the # predicted price of the house) model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x) print("Starting training ") # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True) # compile the model using mean absolute percentage error as our loss, # implying that we seek to minimize the absolute percentage difference # between our price *predictions* and the *actual prices* opt = Adam(lr=1e-3, decay=1e-3 / 200) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy']) # train the model print("[INFO] training model...") model.fit( [train_x_c, train_x_a, ], train_y, # validation_data=([testAttrX, testImagesX], testY), # epochs=int(3*train_x_a.shape[0]/1300), epochs=epochs, batch_size=2048, shuffle=True, callbacks=[early_stopping] ) model.save(model_path) test_x_a = test_x[:,:row*col] test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1) # test_x_b = test_x[:, 9*26:9*26+9*26] # test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1) test_x_c = test_x[:,row*col:] # make predictions on the testing data print("[INFO] predicting house prices...") score = model.evaluate([test_x_c, test_x_a,], test_y) print(score) print('Test score:', score[0]) print('Test accuracy:', score[1])