123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- import keras
- # -*- encoding:utf-8 -*-
- import numpy as np
- from keras.models import Sequential
- # 优化方法选用Adam(其实可选项有很多,如SGD)
- from keras.optimizers import Adam
- import random
- from keras.models import load_model
- from imblearn.over_sampling import RandomOverSampler
- from sklearn.preprocessing import MinMaxScaler
- from keras.utils import np_utils
- # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
- # Flatten作用是将多位输入进行一维化
- # Dense是全连接层
- from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
- from keras import regularizers
- from keras.models import Model
- from keras.callbacks import EarlyStopping
- from keras import backend as K
- K.set_image_data_format('channels_first')
- early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
- epochs= 155
- size = 380000
- file_path = 'D:\\data\\quantization\\stock571_12d_train2.log'
- model_path = '570_5d_mix_5D_ma5_s_seq.h5'
- file_path1='D:\\data\\quantization\\stock563_12d_test.log'
- row = 5
- col = 31
- '''
- 0 dmi 28*20 38,95,72/25 下跌预判非常准 54,95,74
- 1 macd 28*19 41,98,53/8
- 2 dmi-对大盘对比 28*20 35,99,67/32>>
- 3 5d-dmi-对大盘对比 28*20 42,99,39/10
- 4 3d-dmi-对大盘对比 28*20 40,99,39/07
- 5 3d-beta1 55,99,52/07 当前用这个
- 6 3d-ma20 40,99,41/07
- 7 3d-macd 28*19 55,99,40/07
- 8 3d-市值>30 28*20 57,99,56/40>> 最高价 用这个! 43,98,59
- 9 3d-市值>30 28*20 57,99,31/08 收盘最高价
- 10 5d-市值>30 28*20 收盘最高价
- 11 5d-市值>30 28*20 ma5
- 12 5d-极简 28*16 有ma5,ma20 46,102,16/26 test it
- 13 3d-最高价 28*16 57,101,39,16
- 14 5d-极简-最高价 28*16 40,101,47-56 test it 不行 >> 33,100,49
- 15 5d+dmi+最高价 28*20 40,101,48-56 test it 不行
- 16 同12,14,参数11,10 28*16 38,101,47-57 test it 不行
- 17 同上参数11,6 28*16 39,101,47-57 !
- 35 指数向量化 28*14 43,101,47-57
- 36 去掉指数 28*14 40,101,47-57
- 37 指数向量化 修改result已自己为基点 28*17 37,101,47-57 24,101,47
- 38 指数向量化++ 修改result已自己为基点+win=5 28*17 39,101,46-57
- 39 指数向量化++ 修改result已自己为基点+win=8 28*17 37,101,47-57
- 40 指数向量化++ 修改result已自己为基点+win=11 28*17 44,101,45-56
- 41 bigquantor win=11,21 28*21 28,99,53-59
- 42 bigquantor win=7,21 28*21 31,102,42-57
- 43 bigquantor win=6,6 18*21 26,101,44-56 #共40W
- 44 bigquantor win=4,5 18*21 24,101,45-57
- 45 bigquantor win=4,4 12*21 27,101,45-57
- 46 bigquantor win=3,6 12*21 43,101,44-54
- 47 bigquantor win=3,8 12*21 34,101,43-57
- 49 bigquantor win=5,8,stride=1 12*21 33,101,45-57
- 50 bigquantor(open) win=4,4 12*21 34,100,49-56 # 24,100,49
- 51 bigquantor(open) win=5,5 12*21 34,100,49-55
- 52 bigquantor(open) win=3,16 12*21 33,100,49-56
- 53 bigquantor(open) win=4,21 12*21 40,101,46-55
- 53A bigquantor(open) win=4,21 用1层 12*21 36,101,46-55
- 53B bigquantor(open) win=4*21+4*4 用2层 12*21
- 54 指数向量化++(open) 16*17
- 55 指数向量化++(open)+olhc 16*17
- 56 指数向量化++bigquantor win=3*25+4*4 用2层 12*25 29,100,49-55
- 56A 指数向量化++bigquantor win=3*25 用1层 12*25
- 57 指数向量化++bigquantor win=3*25 用1层 12*25 28,100,48-56
- 57A 指数向量化++bigquantor win=6*12 用1层 12*25 27,101,46-53
- 58 指数向量化++bigquantor win=6*12 用1层 6*25 倒过来很吊...
- 58A 指数向量化++bigquantor win=3*25 用1层 6*25
- 61 简化 5*11 42w 15,101,45-50
- 62 简化 3*9 42w
- 63 win=3*9 6*9 14,100,0
- 63A win=2*9 30,101,46
- 63B win=1*9 34,101,45
- 63C win=2*2 用两层 31,101,46
- 63D win=3*3 用两层 25,100,47
- 18 拆成两个,stripe=2,win=5 40,102,30-36
- 19 拆成两个短,15-5-2/15-3-1 38,102,31
- 20 拆成1长1短 28-7-2/10-3-1 34,102,29-36
- 21 换手率用ln函数 涨幅使用ln函数 28*12 34,100,48,36
- 22 Alpha#101: 28*12 44,100,48-36 ? ------ 34,100,48-36
- 23 在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 28*9 36,102,47,100,36----25,100,48,101-36
- 23A win=4 35,101,46,100,57
- 24 Alpha#44: + rank_33 28*10 34,101,47,100,36
- 25 纯dmi 101 22*9 29,101,46,100,36
- 26 macd 101 22*8 28,101,46,101,36
- 27 alpha2+alpha44 22*10 38,101,46,100,36
- 28 rank_2_a+rank_2_b+alpha#51 22*10 37,101,47,101,57
- 29 Alpha#53 去掉价格 18*6 30,101,47,101,58
- 30 有价格 101+54 18*13 33,101,47,101,57
- 31 没价格 54 win=4 18*8 40,101,48,100,56
- 32 同23在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-) 18*9 38,101,47,100,57
- 33 修改test值 18*9 36,98,61,36 25,97,62
- 34 +换手率+dmi修正+alpha53+18 18*17 36,98,60,37
-
- 24 Alpha#6:
- 25 Alpha#9:
- 27 Alpha#12:
- 29 Alpha#23:
- 31 Alpha#51:
- 33 Alpha#54:
- 34 Alpha#2:
- 35 Alpha#9:
- 36 Alpha#12:
- 37 Alpha#18:
- 38
- 39
- 40
- '''
- def read_data(path, path1=file_path1):
- lines = []
- with open(path) as f:
- for line in f.readlines(): #680000
- x = eval(line.strip())
- lines.append(x)
- # with open(path1) as f:
- # for x in range(30000): #6w
- # line = eval(f.readline().strip())
- # lines.append(line)
- random.shuffle(lines)
- print('读取数据完毕')
- d=int(0.85*len(lines))
- length = len(lines[0])
- train_x=[s[:length - 2] for s in lines[0:d]]
- train_y=[s[-1] for s in lines[0:d]]
- test_x=[s[:length - 2] for s in lines[d:]]
- test_y=[s[-1] for s in lines[d:]]
- print('转换数据完毕')
- ros = RandomOverSampler(random_state=0)
- X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
- # mm_scalar = MinMaxScaler()
- # X_resampled = mm_scalar.fit_transform(X_resampled)
- print('数据重采样完毕')
- return X_resampled,y_resampled, np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
- train_x,train_y,test_x,test_y=read_data(file_path)
- train_x_a = train_x[:,:row*col]
- train_x_a = train_x_a.reshape(train_x.shape[0], 1, row, col)
- # train_x_b = train_x[:, 18*col:row*col]
- # train_x_b = train_x_b.reshape(train_x.shape[0], 10, col, 1)
- train_x_c = train_x[:,row*col:]
- def create_mlp(dim, regress=False):
- # define our MLP network
- model = Sequential()
- model.add(Dense(44, input_dim=dim, activation="relu"))
- model.add(Dropout(0.2))
- model.add(Dense(44, activation="relu"))
- # model.add(Dense(96, activation="relu"))
- # model.add(Dense(128, activation="relu"))
- # check to see if the regression node should be added
- if regress:
- model.add(Dense(1, activation="linear"))
- # return our model
- return model
- def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=1):
- # initialize the input shape and channel dimension, assuming
- # TensorFlow/channels-last ordering
- inputShape = (1, width, height)
- chanDim = -1
- # define the model input
- inputs = Input(shape=inputShape)
- # x = inputs
- # CONV => RELU => BN => POOL
- x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs)
- x = Activation("relu")(x)
- x = BatchNormalization(axis=chanDim)(x)
- # x = MaxPooling2D(pool_size=(2,2))(x)
- if width > 2:
- x = Conv2D(32, (2,2), padding="same", strides=1)(x)
- x = Activation("relu")(x)
- x = BatchNormalization(axis=chanDim)(x)
- # y = Activation("relu")(y)
- # y = BatchNormalization(axis=chanDim)(y)
- # flatten the volume, then FC => RELU => BN => DROPOUT
- x = Flatten()(x)
- x = Dense(output)(x)
- x = Activation("relu")(x)
- x = BatchNormalization(axis=chanDim)(x)
- x = Dropout(0.2)(x)
- # apply another FC layer, this one to match the number of nodes
- # coming out of the MLP
- x = Dense(output)(x)
- x = Activation("relu")(x)
- # check to see if the regression node should be added
- if regress:
- x = Dense(1, activation="linear")(x)
- # construct the CNN
- model = Model(inputs, x)
- # return the CNN
- return model
- # create the MLP and CNN models
- mlp = create_mlp(train_x_c.shape[1], regress=False)
- # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
- cnn_0 = create_cnn(row, col, 1, kernel_size=(2, 2), size=36, regress=False, output=88, strides=1) # 29 98 47
- # cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
- # cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
- # cnn_1 = create_cnn(10, col, 1, kernel_size=(3, col), size=66, regress=False, output=66, strides=1)
- # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
- # create the input to our final set of layers as the *output* of both
- # the MLP and CNN
- combinedInput = concatenate([mlp.output, cnn_0.output, ])
- # our final FC layer head will have two dense layers, the final one
- # being our regression head
- x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
- x = Dropout(0.2)(x)
- x = Dense(1024, activation="relu")(x)
- x = Dense(1024, activation="relu")(x)
- # 在建设一层
- x = Dense(3, activation="softmax")(x)
- # our final model will accept categorical/numerical data on the MLP
- # input and images on the CNN input, outputting a single value (the
- # predicted price of the house)
- model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
- print("Starting training ")
- # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
- # compile the model using mean absolute percentage error as our loss,
- # implying that we seek to minimize the absolute percentage difference
- # between our price *predictions* and the *actual prices*
- opt = Adam(lr=1e-3, decay=1e-3 / 200)
- model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
- # train the model
- print("[INFO] training model...")
- model.fit(
- [train_x_c, train_x_a,], train_y,
- # validation_data=([testAttrX, testImagesX], testY),
- # epochs=int(3*train_x_a.shape[0]/1300),
- epochs=epochs,
- batch_size=4096, shuffle=True,
- callbacks=[early_stopping]
- )
- model.save(model_path)
- test_x_a = test_x[:,:row*col]
- test_x_a = test_x_a.reshape(test_x.shape[0], 1, row, col)
- # test_x_b = test_x[:, 18*col:row*col]
- # test_x_b = test_x_b.reshape(test_x.shape[0], 10, col, 1)
- test_x_c = test_x[:,row*col:]
- # make predictions on the testing data
- print("[INFO] predicting house prices...")
- score = model.evaluate([test_x_c, test_x_a, ], test_y)
- print(score)
- print('Test score:', score[0])
- print('Test accuracy:', score[1])
|