yufeng
/
machine_learn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
							import keras
# -*- encoding:utf-8 -*-
import numpy as np
from keras.models import Sequential
# 优化方法选用Adam(其实可选项有很多，如SGD)
from keras.optimizers import Adam
import random
from keras.models import load_model
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import MinMaxScaler
from keras.utils import np_utils
# 用于模型初始化，Conv2D模型初始化、Activation激活函数，MaxPooling2D是池化层
# Flatten作用是将多位输入进行一维化
# Dense是全连接层
from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
from keras import regularizers
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras import backend as K
K.set_image_data_format('channels_first')

early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)

epochs= 155
size = 380000
file_path = 'D:\\data\\quantization\\stock571_12d_train2.log'
model_path = '570_5d_mix_5D_ma5_s_seq.h5'
file_path1='D:\\data\\quantization\\stock563_12d_test.log'
row = 5
col = 31
'''
0    dmi            28*20      38,95,72/25     下跌预判非常准      54,95,74                              
1    macd           28*19      41,98,53/8  
2    dmi-对大盘对比 28*20      35,99,67/32>>                         
3    5d-dmi-对大盘对比 28*20   42,99,39/10
4    3d-dmi-对大盘对比 28*20   40,99,39/07                          
5    3d-beta1                  55,99,52/07    当前用这个             
6    3d-ma20                   40,99,41/07
7    3d-macd   28*19           55,99,40/07
8    3d-市值>30  28*20         57,99,56/40>> 最高价  用这个！        43,98,59
9    3d-市值>30  28*20         57,99,31/08   收盘最高价 
10   5d-市值>30  28*20   收盘最高价  
11   5d-市值>30  28*20   ma5


12   5d-极简     28*16 有ma5,ma20  46,102,16/26   test it 
13   3d-最高价   28*16         57,101,39,16

14   5d-极简-最高价     28*16   40,101,47-56  test it    不行    >>   33,100,49
15   5d+dmi+最高价      28*20   40,101,48-56  test it  不行
16   同12,14,参数11,10  28*16   38,101,47-57  test it 不行
17   同上参数11,6       28*16   39,101,47-57  !
35   指数向量化         28*14   43,101,47-57
36   去掉指数           28*14   40,101,47-57
37   指数向量化 修改result已自己为基点   28*17   37,101,47-57    24,101,47
38   指数向量化++ 修改result已自己为基点+win=5    28*17  39,101,46-57 
39   指数向量化++ 修改result已自己为基点+win=8    28*17  37,101,47-57
40   指数向量化++ 修改result已自己为基点+win=11   28*17  44,101,45-56
41   bigquantor      win=11,21                    28*21  28,99,53-59
42   bigquantor      win=7,21                     28*21  31,102,42-57
43   bigquantor      win=6,6                      18*21  26,101,44-56  #共40W
44   bigquantor      win=4,5                      18*21  24,101,45-57

45   bigquantor      win=4,4                      12*21  27,101,45-57  
46   bigquantor      win=3,6                      12*21  43,101,44-54
47   bigquantor      win=3,8                      12*21  34,101,43-57
49   bigquantor      win=5,8,stride=1             12*21  33,101,45-57

50   bigquantor(open) win=4,4                      12*21  34,100,49-56  # 24,100,49
51   bigquantor(open) win=5,5                      12*21  34,100,49-55
52   bigquantor(open) win=3,16                     12*21  33,100,49-56
53   bigquantor(open) win=4,21                     12*21  40,101,46-55
53A   bigquantor(open) win=4,21 用1层              12*21  36,101,46-55
53B   bigquantor(open) win=4*21+4*4 用2层          12*21   

54   指数向量化++(open)          16*17
55   指数向量化++(open)+olhc     16*17

56   指数向量化++bigquantor   win=3*25+4*4 用2层    12*25    29,100,49-55
56A   指数向量化++bigquantor   win=3*25    用1层    12*25  

57   指数向量化++bigquantor   win=3*25     用1层    12*25    28,100,48-56
57A   指数向量化++bigquantor   win=6*12     用1层    12*25   27,101,46-53

58   指数向量化++bigquantor   win=6*12     用1层    6*25 倒过来很吊... 
58A   指数向量化++bigquantor   win=3*25     用1层    6*25 

61 简化 5*11  42w     15,101,45-50
62 简化 3*9   42w  

63  win=3*9 6*9       14,100,0
63A win=2*9           30,101,46
63B win=1*9           34,101,45  
63C win=2*2 用两层    31,101,46
63D win=3*3 用两层    25,100,47

18   拆成两个,stripe=2,win=5          40,102,30-36
19   拆成两个短,15-5-2/15-3-1   38,102,31        
20   拆成1长1短 28-7-2/10-3-1   34,102,29-36
21   换手率用ln函数 涨幅使用ln函数   28*12  34,100,48,36   
22   Alpha#101:    28*12  44,100,48-36 ? ------ 34,100,48-36
23   在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-)       28*9            36,102,47,100,36----25,100,48,101-36
23A  win=4                                                                          35,101,46,100,57       
24   Alpha#44: + rank_33                                            28*10           34,101,47,100,36
25   纯dmi  101                                                        22*9            29,101,46,100,36
26   macd   101                                                        22*8            28,101,46,101,36
27   alpha2+alpha44           22*10         38,101,46,100,36
28   rank_2_a+rank_2_b+alpha#51     22*10   37,101,47,101,57
29   Alpha#53 去掉价格                      18*6    30,101,47,101,58
30   有价格 101+54                          18*13   33,101,47,101,57
31   没价格 54  win=4                       18*8    40,101,48,100,56

32   同23在简化(换手率-涨跌停形态-与hu板的波动比较-周期大涨大跌-)   18*9         38,101,47,100,57
33   修改test值                        18*9   36,98,61,36         25,97,62    
34   +换手率+dmi修正+alpha53+18        18*17  36,98,60,37

    
24   Alpha#6:                               
25   Alpha#9: 
27   Alpha#12: 
29   Alpha#23: 
31   Alpha#51: 
33  Alpha#54: 
34  Alpha#2: 
35  Alpha#9: 
36  Alpha#12:  
37  Alpha#18: 
38  
39  
40  
'''

def read_data(path, path1=file_path1):
    lines = []
    with open(path) as f:
        for line in f.readlines(): #680000
            x = eval(line.strip())
            lines.append(x)

    # with open(path1) as f:
    #     for x in range(30000): #6w
    #         line = eval(f.readline().strip())
    #         lines.append(line)

    random.shuffle(lines)
    print('读取数据完毕')

    d=int(0.85*len(lines))
    length = len(lines[0])

    train_x=[s[:length - 2] for s in lines[0:d]]
    train_y=[s[-1] for s in lines[0:d]]
    test_x=[s[:length - 2] for s in lines[d:]]
    test_y=[s[-1] for s in lines[d:]]

    print('转换数据完毕')

    ros = RandomOverSampler(random_state=0)
    X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
    # mm_scalar = MinMaxScaler()
    # X_resampled = mm_scalar.fit_transform(X_resampled)

    print('数据重采样完毕')

    return X_resampled,y_resampled, np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)


train_x,train_y,test_x,test_y=read_data(file_path)

train_x_a = train_x[:,:row*col]
train_x_a = train_x_a.reshape(train_x.shape[0], 1, row, col)
# train_x_b = train_x[:, 18*col:row*col]
# train_x_b = train_x_b.reshape(train_x.shape[0], 10, col, 1)
train_x_c = train_x[:,row*col:]


def create_mlp(dim, regress=False):
    # define our MLP network
    model = Sequential()
    model.add(Dense(44, input_dim=dim, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(44, activation="relu"))
    # model.add(Dense(96, activation="relu"))
    # model.add(Dense(128, activation="relu"))

    # check to see if the regression node should be added
    if regress:
        model.add(Dense(1, activation="linear"))

    # return our model
    return model


def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=1):
    # initialize the input shape and channel dimension, assuming
    # TensorFlow/channels-last ordering
    inputShape = (1, width, height)
    chanDim = -1

    # define the model input
    inputs = Input(shape=inputShape)
    # x = inputs
    # CONV => RELU => BN => POOL
    x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)

    # x = MaxPooling2D(pool_size=(2,2))(x)
    if width > 2:
        x = Conv2D(32, (2,2), padding="same", strides=1)(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=chanDim)(x)

    # y = Activation("relu")(y)
    # y = BatchNormalization(axis=chanDim)(y)

    # flatten the volume, then FC => RELU => BN => DROPOUT

    x = Flatten()(x)
    x = Dense(output)(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = Dropout(0.2)(x)

    # apply another FC layer, this one to match the number of nodes
    # coming out of the MLP
    x = Dense(output)(x)
    x = Activation("relu")(x)

    # check to see if the regression node should be added
    if regress:
        x = Dense(1, activation="linear")(x)

    # construct the CNN
    model = Model(inputs, x)

    # return the CNN
    return model


# create the MLP and CNN models
mlp = create_mlp(train_x_c.shape[1], regress=False)
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
cnn_0 = create_cnn(row, col, 1, kernel_size=(2, 2), size=36, regress=False, output=88, strides=1)         # 29 98 47
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
# cnn_1 = create_cnn(10, col, 1, kernel_size=(3, col), size=66, regress=False, output=66, strides=1)
# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)

# create the input to our final set of layers as the *output* of both
# the MLP and CNN
combinedInput = concatenate([mlp.output, cnn_0.output, ])

# our final FC layer head will have two dense layers, the final one
# being our regression head
x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
x = Dropout(0.2)(x)
x = Dense(1024, activation="relu")(x)
x = Dense(1024, activation="relu")(x)
# 在建设一层
x = Dense(3, activation="softmax")(x)

# our final model will accept categorical/numerical data on the MLP
# input and images on the CNN input, outputting a single value (the
# predicted price of the house)
model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)


print("Starting training ")
# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)

# compile the model using mean absolute percentage error as our loss,
# implying that we seek to minimize the absolute percentage difference
# between our price *predictions* and the *actual prices*
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])

# train the model
print("[INFO] training model...")
model.fit(
    [train_x_c, train_x_a,], train_y,
    # validation_data=([testAttrX, testImagesX], testY),
    # epochs=int(3*train_x_a.shape[0]/1300),
    epochs=epochs,
    batch_size=4096, shuffle=True,
    callbacks=[early_stopping]
)

model.save(model_path)

test_x_a = test_x[:,:row*col]
test_x_a = test_x_a.reshape(test_x.shape[0], 1, row, col)
# test_x_b = test_x[:, 18*col:row*col]
# test_x_b = test_x_b.reshape(test_x.shape[0], 10, col, 1)
test_x_c = test_x[:,row*col:]

# make predictions on the testing data
print("[INFO] predicting house prices...")
score  = model.evaluate([test_x_c, test_x_a, ], test_y)

print(score)
print('Test score:', score[0])
print('Test accuracy:', score[1])