yufeng
/
machine_learn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
							import keras
# -*- encoding:utf-8 -*-
import numpy as np
from keras.models import Sequential
# 优化方法选用Adam(其实可选项有很多，如SGD)
from keras.optimizers import Adam
import random
from imblearn.over_sampling import RandomOverSampler
# 用于模型初始化，Conv2D模型初始化、Activation激活函数，MaxPooling2D是池化层
# Flatten作用是将多位输入进行一维化
# Dense是全连接层
from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
from keras import regularizers
from keras.models import Model
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)

epochs= 77
# size = 24000 #共68W
file_path = 'D:\\data\\quantization\\week120_18d_train1.log'
model_path = '120_18d_mix_3W_s_seqA.h5'
row = 18
col = 9
col1 = 13
'''
0   18-3                    18*11           25,102,47-29
1   18W预测3周后最高价+pe   18*11           37,101,44-22
2                           18*11 + 11*16   33,101,41-30
3   stripe=1,win=4-3        18*11 + 11*16   31,108,19-34  ----- 随机25,100,51-26            
4   stripe=1,win=3          18*11 + 11*16   34,103,41-26
5   stripe=1,win=3          18*11           
6   用ma来衡量    
7   简化模型                
8   ma5-大盘相关+alpha_6       18*11 + 11*16         ------25,96,69
9   ma5-大盘相关+alpha_44+alpha_2            51,96,68-07    
10  ma5-大盘相关+alpha_53+alpha_18           48,97,61-06

11  high-大盘相关+alpha_53+alpha_18                             35,103,39-37
12  high-大盘相关+alpha_53+alpha_18(每日)    18*11 + 11*17      33,101,46-30
13  high-大盘相关+alpha_53+alpha_18-dmi      18*6 + 11*16       37,105,33-32

14  high-大盘相关+alpha_53+alpha_18-dmi+result修改(自己对比)    18*6 + 11*16    17,97,59
15  high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi      18*9 + 11*14    26,99,53-22   !!!
16  high-pettm                                                  18*9 + 11*13    29,99,54-26
17  high+大盘相关                                               18*9 + 11*13    26,98,54-27

18  high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi 8周后     18*9 + 11*14     19,111,8,55   16    ----- 随机24,100,49      
19  high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+dmi 4周后     18*9 + 11*14     26,113,2,22   73条数据 
20  high-大盘相关+alpha_53+alpha_18+result修改-波动-ma+双dmi 4周后   18*9 + 11*13     32,110,11,26  大盘超好的时候可以用这种
'''

def read_data(path):
    lines = []
    with open(path) as f:
        for x in f.readlines()[:]: #680000
            line = eval(x.strip())
            lines.append(line)

    # with open(path1) as f:
    #     for x in f.readlines()[:]: #680000
    #         line = eval(x.strip())
    #         lines.append(line)


    random.shuffle(lines)
    print('读取数据完毕')

    d=int(0.95*len(lines))
    length = len(lines[0])

    train_x=[s[:length - 2] for s in lines[0:d]]
    train_y=[s[-1] for s in lines[0:d]]
    test_x=[s[:length - 2] for s in lines[d:]]
    test_y=[s[-1] for s in lines[d:]]

    print('转换数据完毕')

    ros = RandomOverSampler(random_state=0)
    X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))

    print('数据重采样完毕')

    return X_resampled,y_resampled,np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)


train_x,train_y,test_x,test_y=read_data(file_path)

train_x_a = train_x[:,:row*col]
train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
train_x_b = train_x[:, row*col:row*col + 11*col1]
train_x_b = train_x_b.reshape(train_x.shape[0], 11, col1, 1)
train_x_c = train_x[:,row*col + 11*col1:]


def create_mlp(dim, regress=False):
    # define our MLP network
    model = Sequential()
    model.add(Dense(256, input_dim=dim, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(256, activation="relu"))
    # model.add(Dense(256, activation="relu"))
    model.add(Dense(128, activation="relu"))

    # check to see if the regression node should be added
    if regress:
        model.add(Dense(1, activation="linear"))

    # return our model
    return model


def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24, strides=2):
    # initialize the input shape and channel dimension, assuming
    # TensorFlow/channels-last ordering
    inputShape = (width, height, 1)
    chanDim = -1

    # define the model input
    inputs = Input(shape=inputShape)
    # x = inputs
    # CONV => RELU => BN => POOL
    x = Conv2D(size, kernel_size, strides=strides, padding="same")(inputs)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)

    # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
    # y = Activation("relu")(y)
    # y = BatchNormalization(axis=chanDim)(y)

    # flatten the volume, then FC => RELU => BN => DROPOUT
    x = Flatten()(x)
    x = Dense(output)(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = Dropout(0.2)(x)

    # apply another FC layer, this one to match the number of nodes
    # coming out of the MLP
    x = Dense(output)(x)
    x = Activation("relu")(x)

    # check to see if the regression node should be added
    if regress:
        x = Dense(1, activation="linear")(x)

    # construct the CNN
    model = Model(inputs, x)

    # return the CNN
    return model


# create the MLP and CNN models
mlp = create_mlp(train_x_c.shape[1], regress=False)
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
cnn_0 = create_cnn(row, col, 1, kernel_size=(4, col), size=66, regress=False, output=66)         # 29 98 47
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
cnn_1 = create_cnn(11, col1, 1, kernel_size=(3, col1), size=66, regress=False, output=66, strides=1)
# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)

# create the input to our final set of layers as the *output* of both
# the MLP and CNN
combinedInput = concatenate([mlp.output, cnn_0.output, cnn_1.output])

# our final FC layer head will have two dense layers, the final one
# being our regression head
x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
x = Dropout(0.2)(x)
x = Dense(1024, activation="relu")(x)
x = Dense(1024, activation="relu")(x)
# 在建设一层
x = Dense(4, activation="softmax")(x)

# our final model will accept categorical/numerical data on the MLP
# input and images on the CNN input, outputting a single value (the
# predicted price of the house)
model = Model(inputs=[mlp.input, cnn_0.input, cnn_1.input], outputs=x)


print("Starting training ")
# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)

# compile the model using mean absolute percentage error as our loss,
# implying that we seek to minimize the absolute percentage difference
# between our price *predictions* and the *actual prices*
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])

# train the model
print("[INFO] training model...")
model.fit(
    [train_x_c, train_x_a, train_x_b], train_y,
    # validation_data=([testAttrX, testImagesX], testY),
    # epochs=int(3*train_x_a.shape[0]/1300),
    epochs=epochs,
    batch_size=2048, shuffle=True,
    callbacks=[early_stopping]
)

model.save(model_path)

test_x_a = test_x[:,:row*col]
test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
test_x_b = test_x[:, row*col:row*col + 11*col1]
test_x_b = test_x_b.reshape(test_x.shape[0],11, col1, 1)
test_x_c = test_x[:,row*col + 11*col1:]

# make predictions on the testing data
print("[INFO] predicting house prices...")
score  = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)

print(score)
print('Test score:', score[0])
print('Test accuracy:', score[1])