4 years ago · dbbdcf5e48
--- a/industry/industry_predict_100.py
+++ b/industry/industry_predict_100.py
@@ -0,0 +1,95 @@
 
				+# -*- encoding:utf-8 -*-
			
 
				+import numpy as np
			
 
				+from keras.models import load_model
			
 
				+import joblib
			
 
				+
			
 
				+
			
 
				+def read_data(path):
			
 
				+    lines = []
			
 
				+    with open(path) as f:
			
 
				+        for line in f.readlines()[:]:
			
 
				+            line = eval(line.strip())
			
 
				+            lines.append(line)
			
 
				+
			
 
				+    size = len(lines[0])
			
 
				+    train_x=[s[:size - 2] for s in lines]
			
 
				+    train_y=[s[size-1] for s in lines]
			
 
				+    return np.array(train_x, dtype=np.float32),np.array(train_y, dtype=np.float32),lines
			
 
				+
			
 
				+
			
 
				+def _score(fact, line):
			
 
				+    up_right = 0
			
 
				+    up_error = 0
			
 
				+
			
 
				+    if fact[0] == 1:
			
 
				+        up_right = up_right + 1.05
			
 
				+    elif fact[1] == 1:
			
 
				+        up_error = up_error + 0.3
			
 
				+        up_right = up_right + 1.02
			
 
				+    elif fact[2] == 1:
			
 
				+        up_error = up_error + 0.6
			
 
				+        up_right = up_right + 0.98
			
 
				+    else:
			
 
				+        up_error = up_error + 1
			
 
				+        up_right = up_right + 0.95
			
 
				+    return up_right,up_error
			
 
				+
			
 
				+
			
 
				+def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1, row=18, col=20):
			
 
				+    test_x,test_y,lines=read_data(file_path)
			
 
				+
			
 
				+    test_x_a = test_x[:,:row*col]
			
 
				+    test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
			
 
				+    # test_x_b = test_x[:, row*col:row*col+18*2]
			
 
				+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
			
 
				+    test_x_c = test_x[:,row*col:]
			
 
				+
			
 
				+    model=load_model(model_path)
			
 
				+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
			
 
				+    print('MIX', score)
			
 
				+
			
 
				+    up_num = 0
			
 
				+    up_error = 0
			
 
				+    up_right = 0
			
 
				+    down_num = 0
			
 
				+    down_error = 0
			
 
				+    down_right = 0
			
 
				+    i = 0
			
 
				+    result = model.predict([test_x_c, test_x_a, ])
			
 
				+    win_dnn = []
			
 
				+    for r in result:
			
 
				+        fact = test_y[i]
			
 
				+
			
 
				+        if idx in [-2]:
			
 
				+            if r[0] > 0.5 or r[1] > 0.5:
			
 
				+                pass
			
 
				+        else:
			
 
				+            if r[0] > 0.5 or r[1] > 0.5:
			
 
				+                tmp_right,tmp_error = _score(fact, lines[i])
			
 
				+                up_right = tmp_right + up_right
			
 
				+                up_error = tmp_error + up_error
			
 
				+                up_num = up_num + 1
			
 
				+            elif r[2] > 0.5 or r[3] > 0.5:
			
 
				+                if fact[0] == 1:
			
 
				+                    down_error = down_error + 1
			
 
				+                    down_right = down_right + 1.05
			
 
				+                elif fact[1] == 1:
			
 
				+                    down_error = down_error + 0.5
			
 
				+                    down_right = down_right + 1.02
			
 
				+                elif fact[2] == 1:
			
 
				+                    down_right = down_right + 0.98
			
 
				+                else:
			
 
				+                    down_right = down_right + 0.95
			
 
				+                down_num = down_num + 1
			
 
				+
			
 
				+        i = i + 1
			
 
				+    if up_num == 0:
			
 
				+        up_num = 1
			
 
				+    if down_num == 0:
			
 
				+        down_num = 1
			
 
				+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
			
 
				+    return win_dnn,up_right/up_num,down_right/down_num
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    predict(file_path='D:\\data\\quantization\\industry\\stock13_10d_3D_train3.log', model_path='107_10d_mix_3D_s_seq.h5', row=10, col=8)
			
--- a/industry/industry_predict_everyday_100.py
+++ b/industry/industry_predict_everyday_100.py
@@ -0,0 +1,71 @@
 
				+# -*- encoding:utf-8 -*-
			
 
				+import numpy as np
			
 
				+from keras.models import load_model
			
 
				+import random
			
 
				+from mix.stock_source import *
			
 
				+import pymongo
			
 
				+from util.mongodb import get_mongo_table_instance
			
 
				+
			
 
				+code_table = get_mongo_table_instance('tushare_code')
			
 
				+k_table = get_mongo_table_instance('stock_day_k')
			
 
				+stock_concept_table = get_mongo_table_instance('tushare_concept_detail')
			
 
				+all_concept_code_list = list(get_mongo_table_instance('tushare_concept').find({}))
			
 
				+
			
 
				+
			
 
				+gainian_map = {}
			
 
				+hangye_map = {}
			
 
				+
			
 
				+
			
 
				+Z_list = []  # 自选
			
 
				+R_list = []  #  ROE
			
 
				+O_list = []  # 其他
			
 
				+
			
 
				+
			
 
				+def predict_today(file, day, model='10_18d', log=True):
			
 
				+    industry_list = get_hot_industry(day)
			
 
				+
			
 
				+    lines = []
			
 
				+    with open(file) as f:
			
 
				+        for line in f.readlines()[:]:
			
 
				+            line = eval(line.strip())
			
 
				+            lines.append(line)
			
 
				+
			
 
				+    size = len(lines[0])
			
 
				+
			
 
				+    model=load_model(model)
			
 
				+
			
 
				+    for line in lines:
			
 
				+        train_x = np.array([line[:size - 1]])
			
 
				+        train_x_tmp = train_x[:,:10*8]
			
 
				+        train_x_a = train_x_tmp.reshape(train_x.shape[0], 10, 8, 1)
			
 
				+        # train_x_b = train_x_tmp.reshape(train_x.shape[0], 18, 24)
			
 
				+        train_x_c = train_x[:,10*8:]
			
 
				+
			
 
				+        result = model.predict([train_x_c, train_x_a, ])
			
 
				+        # print(result, line[-1])
			
 
				+        stock = code_table.find_one({'ts_code':line[-1][0]})
			
 
				+
			
 
				+        with open('D:\\data\\quantization\\predict\\' + str(day) + '_industry100.txt', mode='a', encoding="utf-8") as f:
			
 
				+            if result[0][0] > 0.5:
			
 
				+                print(line[-1], '大涨')
			
 
				+                O_list.append(line[-1])
			
 
				+                f.write(str(line[-1]) + ',大涨\n')
			
 
				+            elif result[0][1] > 0.5:
			
 
				+                print(line[-1], '涨')
			
 
				+                O_list.append(line[-1])
			
 
				+                f.write(str(line[-1]) + ',涨\n')
			
 
				+            elif result[0][2] > 0.5:
			
 
				+                print(line[-1], '跌')
			
 
				+                f.write(str(line[-1]) + ',跌\n')
			
 
				+            elif result[0][3] > 0.5:
			
 
				+                print(line[-1], '大跌')
			
 
				+                f.write(str(line[-1]) + ',大跌\n')
			
 
				+
			
 
				+    random.shuffle(O_list)
			
 
				+    print(O_list[:3])
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    predict_today("D:\\data\\quantization\\industry\\stock13_10d_3D_20200417.log", 20200417, model='107_10d_mix_3D_s_seq.h5', log=True)
			
 
				+    # join_two_day(20200305, 20200305)
			
 
				+    # check_everyday(20200311, 20200312)
			
--- a/industry/industry_train_100.py
+++ b/industry/industry_train_100.py
@@ -0,0 +1,198 @@
 
				+import keras
			
 
				+# -*- encoding:utf-8 -*-
			
 
				+import numpy as np
			
 
				+from keras.models import Sequential
			
 
				+# 优化方法选用Adam(其实可选项有很多，如SGD)
			
 
				+from keras.optimizers import Adam
			
 
				+import random
			
 
				+from imblearn.over_sampling import RandomOverSampler
			
 
				+# 用于模型初始化，Conv2D模型初始化、Activation激活函数，MaxPooling2D是池化层
			
 
				+# Flatten作用是将多位输入进行一维化
			
 
				+# Dense是全连接层
			
 
				+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
			
 
				+from keras import regularizers
			
 
				+from keras.models import Model
			
 
				+from keras.callbacks import EarlyStopping
			
 
				+
			
 
				+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
			
 
				+
			
 
				+epochs= 50
			
 
				+# size = 24000 #共68W
			
 
				+file_path = 'D:\\data\\quantization\\industry\\stock13_10d_3D_train1.log'
			
 
				+model_path = '107_10d_mix_3D_s_seq.h5'
			
 
				+file_path1='D:\\data\\quantization\\industry\\stock13_10d_3D_train2.log'
			
 
				+row = 10
			
 
				+col = 8
			
 
				+'''
			
 
				+1   10-3天    10*9    35,103,34-28
			
 
				+2   10-3/             34,101,35-28    !
			
 
				+3   窗口=5            33,100,43-29
			
 
				+4   窗口=6            32,101,34-29
			
 
				+5   18-3 win=3  18*9  32,100,42-30
			
 
				+6   18-3 win=6  18*9  37,100,38,27
			
 
				+7   10-3 win=3 amount涨幅 10*8 37,100,40-27
			
 
				+'''
			
 
				+
			
 
				+def read_data(path, path1=file_path1):
			
 
				+    lines = []
			
 
				+    with open(path) as f:
			
 
				+        for x in f.readlines(): #680000
			
 
				+            line = eval(x.strip())
			
 
				+            lines.append(line)
			
 
				+
			
 
				+    with open(path1) as f:
			
 
				+        for x in f.readlines(): #680000
			
 
				+            line = eval(x.strip())
			
 
				+            lines.append(line)
			
 
				+
			
 
				+    random.shuffle(lines)
			
 
				+    print('读取数据完毕')
			
 
				+
			
 
				+    d=int(0.85*len(lines))
			
 
				+    length = len(lines[0])
			
 
				+
			
 
				+    train_x=[s[:length - 2] for s in lines[0:d]]
			
 
				+    train_y=[s[-1] for s in lines[0:d]]
			
 
				+    test_x=[s[:length - 2] for s in lines[d:]]
			
 
				+    test_y=[s[-1] for s in lines[d:]]
			
 
				+
			
 
				+    print('转换数据完毕')
			
 
				+
			
 
				+    ros = RandomOverSampler(random_state=0)
			
 
				+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
			
 
				+
			
 
				+    print('数据重采样完毕')
			
 
				+
			
 
				+    return X_resampled,y_resampled,np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
			
 
				+
			
 
				+
			
 
				+train_x,train_y,test_x,test_y=read_data(file_path)
			
 
				+
			
 
				+train_x_a = train_x[:,:row*col]
			
 
				+train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
			
 
				+# train_x_b = train_x[:, 9*26:18*26]
			
 
				+# train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
			
 
				+train_x_c = train_x[:,row*col:]
			
 
				+
			
 
				+
			
 
				+def create_mlp(dim, regress=False):
			
 
				+    # define our MLP network
			
 
				+    model = Sequential()
			
 
				+    model.add(Dense(256, input_dim=dim, activation="relu"))
			
 
				+    model.add(Dropout(0.2))
			
 
				+    model.add(Dense(256, activation="relu"))
			
 
				+    model.add(Dense(256, activation="relu"))
			
 
				+    model.add(Dense(128, activation="relu"))
			
 
				+
			
 
				+    # check to see if the regression node should be added
			
 
				+    if regress:
			
 
				+        model.add(Dense(1, activation="linear"))
			
 
				+
			
 
				+    # return our model
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
			
 
				+    # initialize the input shape and channel dimension, assuming
			
 
				+    # TensorFlow/channels-last ordering
			
 
				+    inputShape = (width, height, 1)
			
 
				+    chanDim = -1
			
 
				+
			
 
				+    # define the model input
			
 
				+    inputs = Input(shape=inputShape)
			
 
				+    # x = inputs
			
 
				+    # CONV => RELU => BN => POOL
			
 
				+    x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
			
 
				+    x = Activation("relu")(x)
			
 
				+    x = BatchNormalization(axis=chanDim)(x)
			
 
				+
			
 
				+    # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
			
 
				+    # y = Activation("relu")(y)
			
 
				+    # y = BatchNormalization(axis=chanDim)(y)
			
 
				+
			
 
				+    # flatten the volume, then FC => RELU => BN => DROPOUT
			
 
				+    x = Flatten()(x)
			
 
				+    x = Dense(output)(x)
			
 
				+    x = Activation("relu")(x)
			
 
				+    x = BatchNormalization(axis=chanDim)(x)
			
 
				+    x = Dropout(0.2)(x)
			
 
				+
			
 
				+    # apply another FC layer, this one to match the number of nodes
			
 
				+    # coming out of the MLP
			
 
				+    x = Dense(output)(x)
			
 
				+    x = Activation("relu")(x)
			
 
				+
			
 
				+    # check to see if the regression node should be added
			
 
				+    if regress:
			
 
				+        x = Dense(1, activation="linear")(x)
			
 
				+
			
 
				+    # construct the CNN
			
 
				+    model = Model(inputs, x)
			
 
				+
			
 
				+    # return the CNN
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+# create the MLP and CNN models
			
 
				+mlp = create_mlp(train_x_c.shape[1], regress=False)
			
 
				+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
			
 
				+cnn_0 = create_cnn(row, col, 1, kernel_size=(3, col), size=96, regress=False, output=96)         # 29 98 47
			
 
				+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
			
 
				+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
			
 
				+# cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
			
 
				+# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
			
 
				+
			
 
				+# create the input to our final set of layers as the *output* of both
			
 
				+# the MLP and CNN
			
 
				+combinedInput = concatenate([mlp.output, cnn_0.output, ])
			
 
				+
			
 
				+# our final FC layer head will have two dense layers, the final one
			
 
				+# being our regression head
			
 
				+x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
			
 
				+x = Dropout(0.2)(x)
			
 
				+x = Dense(1024, activation="relu")(x)
			
 
				+x = Dense(1024, activation="relu")(x)
			
 
				+# 在建设一层
			
 
				+x = Dense(4, activation="softmax")(x)
			
 
				+
			
 
				+# our final model will accept categorical/numerical data on the MLP
			
 
				+# input and images on the CNN input, outputting a single value (the
			
 
				+# predicted price of the house)
			
 
				+model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
			
 
				+
			
 
				+
			
 
				+print("Starting training ")
			
 
				+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
			
 
				+
			
 
				+# compile the model using mean absolute percentage error as our loss,
			
 
				+# implying that we seek to minimize the absolute percentage difference
			
 
				+# between our price *predictions* and the *actual prices*
			
 
				+opt = Adam(lr=1e-3, decay=1e-3 / 200)
			
 
				+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
			
 
				+
			
 
				+# train the model
			
 
				+print("[INFO] training model...")
			
 
				+model.fit(
			
 
				+    [train_x_c, train_x_a, ], train_y,
			
 
				+    # validation_data=([testAttrX, testImagesX], testY),
			
 
				+    # epochs=int(3*train_x_a.shape[0]/1300),
			
 
				+    epochs=epochs,
			
 
				+    batch_size=2048, shuffle=True,
			
 
				+    callbacks=[early_stopping]
			
 
				+)
			
 
				+
			
 
				+model.save(model_path)
			
 
				+
			
 
				+test_x_a = test_x[:,:row*col]
			
 
				+test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
			
 
				+# test_x_b = test_x[:, 9*26:9*26+9*26]
			
 
				+# test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
			
 
				+test_x_c = test_x[:,row*col:]
			
 
				+
			
 
				+# make predictions on the testing data
			
 
				+print("[INFO] predicting house prices...")
			
 
				+score  = model.evaluate([test_x_c, test_x_a,], test_y)
			
 
				+
			
 
				+print(score)
			
 
				+print('Test score:', score[0])
			
 
				+print('Test accuracy:', score[1])