yufeng 4 years ago
parent
commit
dbbdcf5e48

+ 95 - 0
industry/industry_predict_100.py

@@ -0,0 +1,95 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    lines = []
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            lines.append(line)
13
+
14
+    size = len(lines[0])
15
+    train_x=[s[:size - 2] for s in lines]
16
+    train_y=[s[size-1] for s in lines]
17
+    return np.array(train_x, dtype=np.float32),np.array(train_y, dtype=np.float32),lines
18
+
19
+
20
+def _score(fact, line):
21
+    up_right = 0
22
+    up_error = 0
23
+
24
+    if fact[0] == 1:
25
+        up_right = up_right + 1.05
26
+    elif fact[1] == 1:
27
+        up_error = up_error + 0.3
28
+        up_right = up_right + 1.02
29
+    elif fact[2] == 1:
30
+        up_error = up_error + 0.6
31
+        up_right = up_right + 0.98
32
+    else:
33
+        up_error = up_error + 1
34
+        up_right = up_right + 0.95
35
+    return up_right,up_error
36
+
37
+
38
+def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1, row=18, col=20):
39
+    test_x,test_y,lines=read_data(file_path)
40
+
41
+    test_x_a = test_x[:,:row*col]
42
+    test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
43
+    # test_x_b = test_x[:, row*col:row*col+18*2]
44
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
45
+    test_x_c = test_x[:,row*col:]
46
+
47
+    model=load_model(model_path)
48
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
49
+    print('MIX', score)
50
+
51
+    up_num = 0
52
+    up_error = 0
53
+    up_right = 0
54
+    down_num = 0
55
+    down_error = 0
56
+    down_right = 0
57
+    i = 0
58
+    result = model.predict([test_x_c, test_x_a, ])
59
+    win_dnn = []
60
+    for r in result:
61
+        fact = test_y[i]
62
+
63
+        if idx in [-2]:
64
+            if r[0] > 0.5 or r[1] > 0.5:
65
+                pass
66
+        else:
67
+            if r[0] > 0.5 or r[1] > 0.5:
68
+                tmp_right,tmp_error = _score(fact, lines[i])
69
+                up_right = tmp_right + up_right
70
+                up_error = tmp_error + up_error
71
+                up_num = up_num + 1
72
+            elif r[2] > 0.5 or r[3] > 0.5:
73
+                if fact[0] == 1:
74
+                    down_error = down_error + 1
75
+                    down_right = down_right + 1.05
76
+                elif fact[1] == 1:
77
+                    down_error = down_error + 0.5
78
+                    down_right = down_right + 1.02
79
+                elif fact[2] == 1:
80
+                    down_right = down_right + 0.98
81
+                else:
82
+                    down_right = down_right + 0.95
83
+                down_num = down_num + 1
84
+
85
+        i = i + 1
86
+    if up_num == 0:
87
+        up_num = 1
88
+    if down_num == 0:
89
+        down_num = 1
90
+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
91
+    return win_dnn,up_right/up_num,down_right/down_num
92
+
93
+
94
+if __name__ == '__main__':
95
+    predict(file_path='D:\\data\\quantization\\industry\\stock13_10d_3D_train3.log', model_path='107_10d_mix_3D_s_seq.h5', row=10, col=8)

+ 71 - 0
industry/industry_predict_everyday_100.py

@@ -0,0 +1,71 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import random
5
+from mix.stock_source import *
6
+import pymongo
7
+from util.mongodb import get_mongo_table_instance
8
+
9
+code_table = get_mongo_table_instance('tushare_code')
10
+k_table = get_mongo_table_instance('stock_day_k')
11
+stock_concept_table = get_mongo_table_instance('tushare_concept_detail')
12
+all_concept_code_list = list(get_mongo_table_instance('tushare_concept').find({}))
13
+
14
+
15
+gainian_map = {}
16
+hangye_map = {}
17
+
18
+
19
+Z_list = []  # 自选
20
+R_list = []  #  ROE
21
+O_list = []  # 其他
22
+
23
+
24
+def predict_today(file, day, model='10_18d', log=True):
25
+    industry_list = get_hot_industry(day)
26
+
27
+    lines = []
28
+    with open(file) as f:
29
+        for line in f.readlines()[:]:
30
+            line = eval(line.strip())
31
+            lines.append(line)
32
+
33
+    size = len(lines[0])
34
+
35
+    model=load_model(model)
36
+
37
+    for line in lines:
38
+        train_x = np.array([line[:size - 1]])
39
+        train_x_tmp = train_x[:,:10*8]
40
+        train_x_a = train_x_tmp.reshape(train_x.shape[0], 10, 8, 1)
41
+        # train_x_b = train_x_tmp.reshape(train_x.shape[0], 18, 24)
42
+        train_x_c = train_x[:,10*8:]
43
+
44
+        result = model.predict([train_x_c, train_x_a, ])
45
+        # print(result, line[-1])
46
+        stock = code_table.find_one({'ts_code':line[-1][0]})
47
+
48
+        with open('D:\\data\\quantization\\predict\\' + str(day) + '_industry100.txt', mode='a', encoding="utf-8") as f:
49
+            if result[0][0] > 0.5:
50
+                print(line[-1], '大涨')
51
+                O_list.append(line[-1])
52
+                f.write(str(line[-1]) + ',大涨\n')
53
+            elif result[0][1] > 0.5:
54
+                print(line[-1], '涨')
55
+                O_list.append(line[-1])
56
+                f.write(str(line[-1]) + ',涨\n')
57
+            elif result[0][2] > 0.5:
58
+                print(line[-1], '跌')
59
+                f.write(str(line[-1]) + ',跌\n')
60
+            elif result[0][3] > 0.5:
61
+                print(line[-1], '大跌')
62
+                f.write(str(line[-1]) + ',大跌\n')
63
+
64
+    random.shuffle(O_list)
65
+    print(O_list[:3])
66
+
67
+
68
+if __name__ == '__main__':
69
+    predict_today("D:\\data\\quantization\\industry\\stock13_10d_3D_20200417.log", 20200417, model='107_10d_mix_3D_s_seq.h5', log=True)
70
+    # join_two_day(20200305, 20200305)
71
+    # check_everyday(20200311, 20200312)

+ 198 - 0
industry/industry_train_100.py

@@ -0,0 +1,198 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from imblearn.over_sampling import RandomOverSampler
9
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
10
+# Flatten作用是将多位输入进行一维化
11
+# Dense是全连接层
12
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
13
+from keras import regularizers
14
+from keras.models import Model
15
+from keras.callbacks import EarlyStopping
16
+
17
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
18
+
19
+epochs= 50
20
+# size = 24000 #共68W
21
+file_path = 'D:\\data\\quantization\\industry\\stock13_10d_3D_train1.log'
22
+model_path = '107_10d_mix_3D_s_seq.h5'
23
+file_path1='D:\\data\\quantization\\industry\\stock13_10d_3D_train2.log'
24
+row = 10
25
+col = 8
26
+'''
27
+1   10-3天    10*9    35,103,34-28
28
+2   10-3/             34,101,35-28    !
29
+3   窗口=5            33,100,43-29
30
+4   窗口=6            32,101,34-29
31
+5   18-3 win=3  18*9  32,100,42-30
32
+6   18-3 win=6  18*9  37,100,38,27
33
+7   10-3 win=3 amount涨幅 10*8 37,100,40-27
34
+'''
35
+
36
+def read_data(path, path1=file_path1):
37
+    lines = []
38
+    with open(path) as f:
39
+        for x in f.readlines(): #680000
40
+            line = eval(x.strip())
41
+            lines.append(line)
42
+
43
+    with open(path1) as f:
44
+        for x in f.readlines(): #680000
45
+            line = eval(x.strip())
46
+            lines.append(line)
47
+
48
+    random.shuffle(lines)
49
+    print('读取数据完毕')
50
+
51
+    d=int(0.85*len(lines))
52
+    length = len(lines[0])
53
+
54
+    train_x=[s[:length - 2] for s in lines[0:d]]
55
+    train_y=[s[-1] for s in lines[0:d]]
56
+    test_x=[s[:length - 2] for s in lines[d:]]
57
+    test_y=[s[-1] for s in lines[d:]]
58
+
59
+    print('转换数据完毕')
60
+
61
+    ros = RandomOverSampler(random_state=0)
62
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
63
+
64
+    print('数据重采样完毕')
65
+
66
+    return X_resampled,y_resampled,np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
67
+
68
+
69
+train_x,train_y,test_x,test_y=read_data(file_path)
70
+
71
+train_x_a = train_x[:,:row*col]
72
+train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
73
+# train_x_b = train_x[:, 9*26:18*26]
74
+# train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
75
+train_x_c = train_x[:,row*col:]
76
+
77
+
78
+def create_mlp(dim, regress=False):
79
+    # define our MLP network
80
+    model = Sequential()
81
+    model.add(Dense(256, input_dim=dim, activation="relu"))
82
+    model.add(Dropout(0.2))
83
+    model.add(Dense(256, activation="relu"))
84
+    model.add(Dense(256, activation="relu"))
85
+    model.add(Dense(128, activation="relu"))
86
+
87
+    # check to see if the regression node should be added
88
+    if regress:
89
+        model.add(Dense(1, activation="linear"))
90
+
91
+    # return our model
92
+    return model
93
+
94
+
95
+def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
96
+    # initialize the input shape and channel dimension, assuming
97
+    # TensorFlow/channels-last ordering
98
+    inputShape = (width, height, 1)
99
+    chanDim = -1
100
+
101
+    # define the model input
102
+    inputs = Input(shape=inputShape)
103
+    # x = inputs
104
+    # CONV => RELU => BN => POOL
105
+    x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
106
+    x = Activation("relu")(x)
107
+    x = BatchNormalization(axis=chanDim)(x)
108
+
109
+    # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
110
+    # y = Activation("relu")(y)
111
+    # y = BatchNormalization(axis=chanDim)(y)
112
+
113
+    # flatten the volume, then FC => RELU => BN => DROPOUT
114
+    x = Flatten()(x)
115
+    x = Dense(output)(x)
116
+    x = Activation("relu")(x)
117
+    x = BatchNormalization(axis=chanDim)(x)
118
+    x = Dropout(0.2)(x)
119
+
120
+    # apply another FC layer, this one to match the number of nodes
121
+    # coming out of the MLP
122
+    x = Dense(output)(x)
123
+    x = Activation("relu")(x)
124
+
125
+    # check to see if the regression node should be added
126
+    if regress:
127
+        x = Dense(1, activation="linear")(x)
128
+
129
+    # construct the CNN
130
+    model = Model(inputs, x)
131
+
132
+    # return the CNN
133
+    return model
134
+
135
+
136
+# create the MLP and CNN models
137
+mlp = create_mlp(train_x_c.shape[1], regress=False)
138
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
139
+cnn_0 = create_cnn(row, col, 1, kernel_size=(3, col), size=96, regress=False, output=96)         # 29 98 47
140
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
141
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
142
+# cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
143
+# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
144
+
145
+# create the input to our final set of layers as the *output* of both
146
+# the MLP and CNN
147
+combinedInput = concatenate([mlp.output, cnn_0.output, ])
148
+
149
+# our final FC layer head will have two dense layers, the final one
150
+# being our regression head
151
+x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
152
+x = Dropout(0.2)(x)
153
+x = Dense(1024, activation="relu")(x)
154
+x = Dense(1024, activation="relu")(x)
155
+# 在建设一层
156
+x = Dense(4, activation="softmax")(x)
157
+
158
+# our final model will accept categorical/numerical data on the MLP
159
+# input and images on the CNN input, outputting a single value (the
160
+# predicted price of the house)
161
+model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
162
+
163
+
164
+print("Starting training ")
165
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
166
+
167
+# compile the model using mean absolute percentage error as our loss,
168
+# implying that we seek to minimize the absolute percentage difference
169
+# between our price *predictions* and the *actual prices*
170
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
171
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
172
+
173
+# train the model
174
+print("[INFO] training model...")
175
+model.fit(
176
+    [train_x_c, train_x_a, ], train_y,
177
+    # validation_data=([testAttrX, testImagesX], testY),
178
+    # epochs=int(3*train_x_a.shape[0]/1300),
179
+    epochs=epochs,
180
+    batch_size=2048, shuffle=True,
181
+    callbacks=[early_stopping]
182
+)
183
+
184
+model.save(model_path)
185
+
186
+test_x_a = test_x[:,:row*col]
187
+test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
188
+# test_x_b = test_x[:, 9*26:9*26+9*26]
189
+# test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
190
+test_x_c = test_x[:,row*col:]
191
+
192
+# make predictions on the testing data
193
+print("[INFO] predicting house prices...")
194
+score  = model.evaluate([test_x_c, test_x_a,], test_y)
195
+
196
+print(score)
197
+print('Test score:', score[0])
198
+print('Test accuracy:', score[1])