import keras # -*- encoding:utf-8 -*- import numpy as np from keras.models import Sequential # 优化方法选用Adam(其实可选项有很多,如SGD) from keras.optimizers import Adam import random from keras.models import load_model from imblearn.over_sampling import RandomOverSampler from sklearn.preprocessing import MinMaxScaler from keras.utils import np_utils # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层 # Flatten作用是将多位输入进行一维化 # Dense是全连接层 from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate from keras import regularizers from keras.models import Model from keras.callbacks import EarlyStopping from keras import backend as K K.set_image_data_format('channels_first') early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2) epochs= 155 size = 380000 file_path = 'D:\\data\\quantization\\stock571_12d_train2.log' model_path = '570_5d_mix_5D_ma5_s_seq.h5' file_path1='D:\\data\\quantization\\stock563_12d_test.log' row = 5 col = 31 ''' 0 dmi 28*20 38,95,72/25 下跌预判非常准 54,95,74 1 macd 28*19 41,98,53/8 2 dmi-对大盘对比 28*20 35,99,67/32>> 3 5d-dmi-对大盘对比 28*20 42,99,39/10 4 3d-dmi-对大盘对比 28*20 40,99,39/07 5 3d-beta1 55,99,52/07 当前用这个 6 3d-ma20 40,99,41/07 7 3d-macd 28*19 55,99,40/07 8 3d-市值>30 28*20 57,99,56/40>> 最高价 用这个! 43,98,59 9 3d-市值>30 28*20 57,99,31/08 收盘最高价 10 5d-市值>30 28*20 收盘最高价 11 5d-市值>30 28*20 ma5 12 5d-极简 28*16 有ma5,ma20 46,102,16/26 test it 13 3d-最高价 28*16 57,101,39,16 14 5d-极简-最高价 28*16 40,101,47-56 test it 不行 >> 33,100,49 15 5d+dmi+最高价 28*20 40,101,48-56 test it 不行 16 同12,14,参数11,10 28*16 38,101,47-57 test it 不行 17 同上参数11,6 28*16 39,101,47-57 ! 35 指数向量化 28*14 43,101,47-57 36 去掉指数 28*14 40,101,47-57 37 指数向量化 修改result已自己为基点 28*17 37,101,47-57 24,101,47 38 指数向量化++ 修改result已自己为基点+win=5 28*17 39,101,46-57 39 指数向量化++ 修改result已自己为基点+win=8 28*17 37,101,47-57 40 指数向量化++ 修改result已自己为基点+win=11 28*17 44,101,45-56 41 bigquantor win=11,21 28*21 28,99,53-59 42 bigquantor win=7,21 28*21 31,102,42-57 43 bigquantor win=6,6 18*21 26,101,44-56 #共40W 44 bigquantor win=4,5 18*21 24,101,45-57 45 bigquantor win=4,4 12*21 27,101,45-57 46 bigquantor win=3,6 12*21 43,101,44-54 47 bigquantor win=3,8 12*21 34,101,43-57 49 bigquantor win=5,8,stride=1 12*21 33,101,45-57 50 bigquantor(open) win=4,4 12*21 34,100,49-56 # 24,100,49 51 bigquantor(open) win=5,5 12*21 34,100,49-55 52 bigquantor(open) win=3,16 12*21 33,100,49-56 53 bigquantor(open) win=4,21 12*21 40,101,46-55 53A bigquantor(open) win=4,21 用1层 12*21 36,101,46-55 53B bigquantor(open) win=4*21+4*4 用2层 12*21 54 指数向量化++(open) 16*17 55 指数向量化++(open)+olhc 16*17 56 指数向量化++bigquantor win=3*25+4*4 用2层 12*25 29,100,49-55 56A 指数向量化++bigquantor win=3*25 用1层 12*25 57 指数向量化++bigquantor win=3*25 用1层 12*25 28,100,48-56 57A 指数向量化++bigquantor win=6*12 用1层 12*25 27,101,46-53 58 指数向量化++bigquantor win=6*12 用1层 6*25 倒过来很吊... 58A 指数向量化++bigquantor win=3*25 用1层 6*25 61 简化 5*11 42w 15,101,45-50 62 简化 3*9 42w 63 win=3*9 6*9 14,100,0 63A win=2*9 30,101,46 63B win=1*9 34,101,45 63C win=2*2 用两层 31,101,46 63D win=3*3 用两层 25,100,47 18 拆成两个,stripe=2,win=5 40,102,30-36 19 拆成两个短,15-5-2/15-3-1 38,102,31 20 拆成1长1短 28-7-2/10-3-1 34,102,29-36 21 换手率用ln函数 涨幅使用ln函数 28*12 34,100,48,36 22 Alpha#101: 28*12 44,100,48-36 ? ------ 34,100,48-36 23 在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 28*9 36,102,47,100,36----25,100,48,101-36 23A win=4 35,101,46,100,57 24 Alpha#44: + rank_33 28*10 34,101,47,100,36 25 纯dmi 101 22*9 29,101,46,100,36 26 macd 101 22*8 28,101,46,101,36 27 alpha2+alpha44 22*10 38,101,46,100,36 28 rank_2_a+rank_2_b+alpha#51 22*10 37,101,47,101,57 29 Alpha#53 去掉价格 18*6 30,101,47,101,58 30 有价格 101+54 18*13 33,101,47,101,57 31 没价格 54 win=4 18*8 40,101,48,100,56 32 同23在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 18*9 38,101,47,100,57 33 修改test值 18*9 36,98,61,36 25,97,62 34 +换手率+dmi修正+alpha53+18 18*17 36,98,60,37 24 Alpha#6: 25 Alpha#9: 27 Alpha#12: 29 Alpha#23: 31 Alpha#51: 33 Alpha#54: 34 Alpha#2: 35 Alpha#9: 36 Alpha#12: 37 Alpha#18: 38 39 40 ''' def read_data(path, path1=file_path1): lines = [] with open(path) as f: for line in f.readlines(): #680000 x = eval(line.strip()) lines.append(x) # with open(path1) as f: # for x in range(30000): #6w # line = eval(f.readline().strip()) # lines.append(line) random.shuffle(lines) print('读取数据完毕') d=int(0.85*len(lines)) length = len(lines[0]) train_x=[s[:length - 2] for s in lines[0:d]] train_y=[s[-1] for s in lines[0:d]] test_x=[s[:length - 2] for s in lines[d:]] test_y=[s[-1] for s in lines[d:]] print('转换数据完毕') ros = RandomOverSampler(random_state=0) X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32)) # mm_scalar = MinMaxScaler() # X_resampled = mm_scalar.fit_transform(X_resampled) print('数据重采样完毕') return X_resampled,y_resampled, np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32) train_x,train_y,test_x,test_y=read_data(file_path) train_x_a = train_x[:,:row*col] train_x_a = train_x_a.reshape(train_x.shape[0], 1, row, col) # train_x_b = train_x[:, 18*col:row*col] # train_x_b = train_x_b.reshape(train_x.shape[0], 10, col, 1) train_x_c = train_x[:,row*col:] def create_mlp(dim, regress=False): # define our MLP network model = Sequential() model.add(Dense(44, input_dim=dim, activation="relu")) model.add(Dropout(0.2)) model.add(Dense(44, activation="relu")) # model.add(Dense(96, activation="relu")) # model.add(Dense(128, activation="relu")) # check to see if the regression node should be added if regress: model.add(Dense(1, activation="linear")) # return our model return model def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=1): # initialize the input shape and channel dimension, assuming # TensorFlow/channels-last ordering inputShape = (1, width, height) chanDim = -1 # define the model input inputs = Input(shape=inputShape) # x = inputs # CONV => RELU => BN => POOL x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs) x = Activation("relu")(x) x = BatchNormalization(axis=chanDim)(x) # x = MaxPooling2D(pool_size=(2,2))(x) if width > 2: x = Conv2D(32, (2,2), padding="same", strides=1)(x) x = Activation("relu")(x) x = BatchNormalization(axis=chanDim)(x) # y = Activation("relu")(y) # y = BatchNormalization(axis=chanDim)(y) # flatten the volume, then FC => RELU => BN => DROPOUT x = Flatten()(x) x = Dense(output)(x) x = Activation("relu")(x) x = BatchNormalization(axis=chanDim)(x) x = Dropout(0.2)(x) # apply another FC layer, this one to match the number of nodes # coming out of the MLP x = Dense(output)(x) x = Activation("relu")(x) # check to see if the regression node should be added if regress: x = Dense(1, activation="linear")(x) # construct the CNN model = Model(inputs, x) # return the CNN return model # create the MLP and CNN models mlp = create_mlp(train_x_c.shape[1], regress=False) # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46 cnn_0 = create_cnn(row, col, 1, kernel_size=(2, 2), size=36, regress=False, output=88, strides=1) # 29 98 47 # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53 # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96) # cnn_1 = create_cnn(10, col, 1, kernel_size=(3, col), size=66, regress=False, output=66, strides=1) # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64) # create the input to our final set of layers as the *output* of both # the MLP and CNN combinedInput = concatenate([mlp.output, cnn_0.output, ]) # our final FC layer head will have two dense layers, the final one # being our regression head x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput) x = Dropout(0.2)(x) x = Dense(1024, activation="relu")(x) x = Dense(1024, activation="relu")(x) # 在建设一层 x = Dense(3, activation="softmax")(x) # our final model will accept categorical/numerical data on the MLP # input and images on the CNN input, outputting a single value (the # predicted price of the house) model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x) print("Starting training ") # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True) # compile the model using mean absolute percentage error as our loss, # implying that we seek to minimize the absolute percentage difference # between our price *predictions* and the *actual prices* opt = Adam(lr=1e-3, decay=1e-3 / 200) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy']) # train the model print("[INFO] training model...") model.fit( [train_x_c, train_x_a,], train_y, # validation_data=([testAttrX, testImagesX], testY), # epochs=int(3*train_x_a.shape[0]/1300), epochs=epochs, batch_size=4096, shuffle=True, callbacks=[early_stopping] ) model.save(model_path) test_x_a = test_x[:,:row*col] test_x_a = test_x_a.reshape(test_x.shape[0], 1, row, col) # test_x_b = test_x[:, 18*col:row*col] # test_x_b = test_x_b.reshape(test_x.shape[0], 10, col, 1) test_x_c = test_x[:,row*col:] # make predictions on the testing data print("[INFO] predicting house prices...") score = model.evaluate([test_x_c, test_x_a, ], test_y) print(score) print('Test score:', score[0]) print('Test accuracy:', score[1])