123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- import keras
- # -*- encoding:utf-8 -*-
- import numpy as np
- from keras.models import Sequential
- # 优化方法选用Adam(其实可选项有很多,如SGD)
- from keras.optimizers import Adam
- import random
- from keras.models import load_model
- from imblearn.over_sampling import RandomOverSampler
- from keras.utils import np_utils
- # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
- # Flatten作用是将多位输入进行一维化
- # Dense是全连接层
- from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
- from keras import regularizers
- from keras.models import Model
- from keras.callbacks import EarlyStopping
- early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
- epochs= 108
- size = 450000 #18W 60W
- file_path = 'D:\\data\\quantization\\stock175_18d_train2.log'
- model_path = '175_18d_mix_6D_ma5_s_seq_2.h5'
- def read_data(path, path1="D:\\data\\quantization\\stock175_18d_train1.log"):
- lines = []
- with open(path) as f:
- for x in range(size): #610000
- line = eval(f.readline().strip())
- lines.append(line)
- with open(path1) as f:
- for x in range(50000):
- line = eval(f.readline().strip())
- lines.append(line)
- random.shuffle(lines)
- print('读取数据完毕')
- d=int(0.85*len(lines))
- length = len(lines[0])
- train_x=[s[:length - 2] for s in lines[0:d]]
- train_y=[s[-1] for s in lines[0:d]]
- test_x=[s[:length - 2] for s in lines[d:]]
- test_y=[s[-1] for s in lines[d:]]
- print('转换数据完毕')
- ros = RandomOverSampler(random_state=0)
- X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
- print('数据重采样完毕')
- return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
- train_x,train_y,test_x,test_y=read_data(file_path)
- train_x_a = train_x[:,:18*19]
- train_x_a = train_x_a.reshape(train_x.shape[0], 18, 19, 1)
- # train_x_b = train_x[:, 9*26:18*26]
- # train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
- train_x_c = train_x[:,18*19:]
- def create_mlp(dim, regress=False):
- # define our MLP network
- model = Sequential()
- model.add(Dense(96, input_dim=dim, activation="relu"))
- model.add(Dense(96, activation="relu"))
- # check to see if the regression node should be added
- if regress:
- model.add(Dense(1, activation="linear"))
- # return our model
- return model
- def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
- # initialize the input shape and channel dimension, assuming
- # TensorFlow/channels-last ordering
- inputShape = (width, height, 1)
- chanDim = -1
- # define the model input
- inputs = Input(shape=inputShape)
- # x = inputs
- # CONV => RELU => BN => POOL
- x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
- x = Activation("relu")(x)
- x = BatchNormalization(axis=chanDim)(x)
- # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
- # y = Activation("relu")(y)
- # y = BatchNormalization(axis=chanDim)(y)
- # flatten the volume, then FC => RELU => BN => DROPOUT
- x = Flatten()(x)
- x = Dense(output)(x)
- x = Activation("relu")(x)
- x = BatchNormalization(axis=chanDim)(x)
- x = Dropout(0.2)(x)
- # apply another FC layer, this one to match the number of nodes
- # coming out of the MLP
- x = Dense(output)(x)
- x = Activation("relu")(x)
- # check to see if the regression node should be added
- if regress:
- x = Dense(1, activation="linear")(x)
- # construct the CNN
- model = Model(inputs, x)
- # return the CNN
- return model
- # create the MLP and CNN models
- mlp = create_mlp(train_x_c.shape[1], regress=False)
- # cnn_0 = create_cnn(18, 21, 1, kernel_size=(3, 3), size=64, regress=False, output=128) # 31 97 46
- # cnn_0 = create_cnn(18, 21, 1, kernel_size=(6, 6), size=64, regress=False, output=128) # 29 98 47
- # cnn_0 = create_cnn(18, 21, 1, kernel_size=(9, 9), size=64, regress=False, output=128) # 28 97 53
- cnn_0 = create_cnn(18, 19, 1, kernel_size=(3, 19), size=96, regress=False, output=128) #A 28 98 40 #B 38 97 51 #A 24 98 41 #b 33 97 48 #B
- # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
- # create the input to our final set of layers as the *output* of both
- # the MLP and CNN
- combinedInput = concatenate([mlp.output, cnn_0.output])
- # our final FC layer head will have two dense layers, the final one
- # being our regression head
- x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
- x = Dropout(0.2)(x)
- x = Dense(1024, activation="relu")(x)
- x = Dense(1024, activation="relu")(x)
- # 在建设一层
- x = Dense(5, activation="softmax")(x)
- # our final model will accept categorical/numerical data on the MLP
- # input and images on the CNN input, outputting a single value (the
- # predicted price of the house)
- model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
- print("Starting training ")
- # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
- # compile the model using mean absolute percentage error as our loss,
- # implying that we seek to minimize the absolute percentage difference
- # between our price *predictions* and the *actual prices*
- opt = Adam(lr=1e-3, decay=1e-3 / 200)
- model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
- # train the model
- print("[INFO] training model...")
- model.fit(
- [train_x_c, train_x_a], train_y,
- # validation_data=([testAttrX, testImagesX], testY),
- # epochs=int(3*train_x_a.shape[0]/1300),
- epochs=epochs,
- batch_size=2048, shuffle=True,
- callbacks=[early_stopping]
- )
- model.save(model_path)
- test_x_a = test_x[:,:18*19]
- test_x_a = test_x_a.reshape(test_x.shape[0], 18, 19, 1)
- # test_x_b = test_x[:, 9*26:9*26+9*26]
- # test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
- test_x_c = test_x[:,18*19:]
- # make predictions on the testing data
- print("[INFO] predicting house prices...")
- score = model.evaluate([test_x_c, test_x_a], test_y)
- print(score)
- print('Test score:', score[0])
- print('Test accuracy:', score[1])
|