Browse Source

个股预测

yufeng 4 years ago
parent
commit
156e9c06e6

+ 96 - 0
mix/mix_predict_200.py

@@ -0,0 +1,96 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    lines = []
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            lines.append(line)
13
+
14
+    size = len(lines[0])
15
+    train_x=[s[:size - 2] for s in lines]
16
+    train_y=[s[size-1] for s in lines]
17
+    return np.array(train_x),np.array(train_y),lines
18
+
19
+
20
+def _score(fact, line):
21
+    up_right = 0
22
+    up_error = 0
23
+
24
+    if fact[0] == 1:
25
+        up_right = up_right + 1.04
26
+    elif fact[1] == 1:
27
+        up_right = up_right + 1
28
+    elif fact[1] == 2:
29
+        up_right = up_right + 0.96
30
+        up_error = up_error + 0.3
31
+    else:
32
+        up_error = up_error + 1
33
+        up_right = up_right + 0.90
34
+    return up_right,up_error
35
+
36
+
37
+def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1, row=18, col=20):
38
+    test_x,test_y,lines=read_data(file_path)
39
+
40
+    test_x_a = test_x[:,:row*col]
41
+    test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
42
+    # test_x_b = test_x[:, 9*26:9*26+9*26]
43
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
44
+    test_x_c = test_x[:,row*col:]
45
+
46
+    model=load_model(model_path)
47
+    score = model.evaluate([test_x_c, test_x_a,], test_y)
48
+    print('MIX', score)
49
+
50
+    up_num = 0
51
+    up_error = 0
52
+    up_right = 0
53
+    down_num = 0
54
+    down_error = 0
55
+    down_right = 0
56
+    i = 0
57
+    result = model.predict([test_x_c, test_x_a])
58
+    win_dnn = []
59
+    for r in result:
60
+        fact = test_y[i]
61
+
62
+        if idx in [-2]:
63
+            if r[0] > 0.5 or r[1] > 0.5:
64
+                pass
65
+        else:
66
+            if r[0] > 0.7:
67
+                tmp_right,tmp_error = _score(fact, lines[i])
68
+                up_right = tmp_right + up_right
69
+                up_error = tmp_error + up_error
70
+                up_num = up_num + 1
71
+            elif r[2] > 0.6:
72
+                if fact[0] == 1:
73
+                    down_error = down_error + 1
74
+                    down_right = down_right + 1.1
75
+                elif fact[1] == 1:
76
+                    down_error = down_error + 0.2
77
+                    down_right = down_right + 0.99
78
+                else:
79
+                    down_right = down_right + 0.88
80
+                down_num = down_num + 1
81
+
82
+        i = i + 1
83
+    if up_num == 0:
84
+        up_num = 1
85
+    if down_num == 0:
86
+        down_num = 1
87
+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
88
+    return win_dnn,up_right/up_num,down_right/down_num
89
+
90
+
91
+if __name__ == '__main__':
92
+    # predict(file_path='D:\\data\\quantization\\stock181_18d_test.log', model_path='181_18d_mix_6D_ma5_s_seq.h5')
93
+    predict(file_path='D:\\data\\quantization\\stock201_18d_train1.log', model_path='213_18d_mix_6D_ma5_s_seq.h5', row=18, col=20)
94
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
95
+    # multi_predict(model='15_18d')
96
+    # predict_today(20200229, model='11_18d')

+ 1 - 1
mix/mix_predict_by_day_190.py

@@ -81,5 +81,5 @@ if __name__ == '__main__':
81 81
     # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
82 82
     # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
83 83
     # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
84
-    predict(file_path='D:\\data\\quantization\\stock196_18d_20200325.log', model_path='196_18d_mix_6D_ma5_s_seq')
84
+    predict(file_path='D:\\data\\quantization\\stock196_18d_20200326.log', model_path='196_18d_mix_6D_ma5_s_seq')
85 85
     # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 1 - 1
mix/mix_predict_everyday.py

@@ -227,6 +227,6 @@ if __name__ == '__main__':
227 227
     # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
228 228
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
229 229
     # multi_predict()
230
-    predict_today("D:\\data\\quantization\\stock186_18d_20200325.log", 20200325, model='186_18d_mix_6D_ma5_s_seq.h5', log=True)
230
+    predict_today("D:\\data\\quantization\\stock186_18d_20200326.log", 20200326, model='186_18d_mix_6D_ma5_s_seq.h5', log=True)
231 231
     # join_two_day(20200305, 20200305)
232 232
     # check_everyday(20200311, 20200312)

+ 7 - 5
mix/mix_train_190.py

@@ -25,8 +25,10 @@ model_path = '196_18d_mix_6D_ma5_s_seq.h5'
25 25
 file_path1='D:\\data\\quantization\\stock196_18d_test.log'
26 26
 
27 27
 '''
28
+大盘预测
28 29
 结果均用使用ma
29
-4 ROC         
30
+6 ROC
31
+5 after用5日         
30 32
 '''
31 33
 
32 34
 def read_data(path, path1=file_path1):
@@ -36,10 +38,10 @@ def read_data(path, path1=file_path1):
36 38
             line = eval(f.readline().strip())
37 39
             lines.append(line)
38 40
 
39
-    # with open(path1) as f:
40
-    #     for x in range(50000):
41
-    #         line = eval(f.readline().strip())
42
-    #         lines.append(line)
41
+    with open(path1) as f:
42
+        for x in range(50000):
43
+            line = eval(f.readline().strip())
44
+            lines.append(line)
43 45
 
44 46
     random.shuffle(lines)
45 47
     print('读取数据完毕')

+ 39 - 20
mix/mix_train_200.py

@@ -19,16 +19,29 @@ from keras.callbacks import EarlyStopping
19 19
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
20 20
 
21 21
 epochs= 68
22
-size = 400000 #18W 60W
23
-file_path = 'D:\\data\\quantization\\stock200_18d_train2.log'
24
-model_path = '200_18d_mix_6D_ma5_s_seq.h5'
25
-file_path1='D:\\data\\quantization\\stock200_18d_test.log'
22
+size = 410000 #18W 60W
23
+file_path = 'D:\\data\\quantization\\stock202_18d_train2.log'
24
+model_path = '213_18d_mix_6D_ma5_s_seq.h5'
25
+file_path1='D:\\data\\quantization\\stock202_18d_test.log'
26 26
 '''
27 27
 ROC
28
-1大盘涨停 涨停/跌停/创业板涨停跌停   
29
-2大盘涨停 涨停/跌停
30
-3beta函数修正
31
-4用两个kernel
28
+0大盘涨停 涨停/跌停/创业板涨停跌停     42,97,45    43,97,36 
29
+1大盘涨停 涨停/跌停                    48,98,38
30
+2beta函数修正                          46,98,39    47,97,27      <---2
31
+3用两个kernel                          42,98,42
32
+4窗口大小修改  
33
+  3*3                                  45,98,43    45,97,34      <---4
34
+  6*6                                  46,98,41
35
+  6*20                                 45,98,38    47,97,28      <---5
36
+  9*9                                  42,97,45    42,97,36      <---6
37
+  9*20                                 45,98,39    
38
+  3*20                                 42,98,40    44,97,30      <---7
39
+5 修改神经元
40
+ mlp -> 4+3层                          42,98,41    51,97,31      <---9
41
+ 外层 -> 3+4层                                     52,97,37      <--11
42
+beta+4+3                                           49,96,37
43
+beta+2+5                                           52,96,38
44
+
32 45
 
33 46
 '''
34 47
 
@@ -40,7 +53,7 @@ def read_data(path, path1=file_path1):
40 53
             lines.append(line)
41 54
 
42 55
     with open(path1) as f:
43
-        for x in range(30000):
56
+        for x in range(50000):
44 57
             line = eval(f.readline().strip())
45 58
             lines.append(line)
46 59
 
@@ -77,8 +90,11 @@ train_x_c = train_x[:,18*20:]
77 90
 def create_mlp(dim, regress=False):
78 91
     # define our MLP network
79 92
     model = Sequential()
80
-    model.add(Dense(96, input_dim=dim, activation="relu"))
81
-    model.add(Dense(96, activation="relu"))
93
+    model.add(Dense(128, input_dim=dim, activation="relu"))
94
+    model.add(Dropout(0.2))
95
+    model.add(Dense(128, activation="relu"))
96
+    model.add(Dense(128, activation="relu"))
97
+    # model.add(Dense(128, activation="relu"))
82 98
 
83 99
     # check to see if the regression node should be added
84 100
     if regress:
@@ -131,30 +147,33 @@ def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False,
131 147
 
132 148
 # create the MLP and CNN models
133 149
 mlp = create_mlp(train_x_c.shape[1], regress=False)
134
-# cnn_0 = create_cnn(18, 21, 1, kernel_size=(3, 3), size=64, regress=False, output=128)       # 31 97 46
135
-# cnn_0 = create_cnn(18, 21, 1, kernel_size=(6, 6), size=64, regress=False, output=128)         # 29 98 47
136
-# cnn_0 = create_cnn(18, 21, 1, kernel_size=(9, 9), size=64, regress=False, output=128)         # 28 97 53
137
-cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=96, regress=False, output=96)       #A23 99 33 A' 26 99 36 #B 34 98 43
138
-# cnn_1 = create_cnn(18, 21, 1, kernel_size=(18, 11), size=96, regress=False, output=96)
150
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
151
+cnn_0 = create_cnn(18, 20, 1, kernel_size=(6, 20), size=96, regress=False, output=96)         # 29 98 47
152
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
153
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
154
+# cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
139 155
 # cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
140 156
 
141 157
 # create the input to our final set of layers as the *output* of both
142 158
 # the MLP and CNN
143
-combinedInput = concatenate([mlp.output, cnn_0.output])
159
+combinedInput = concatenate([mlp.output, cnn_0.output, ])
144 160
 
145 161
 # our final FC layer head will have two dense layers, the final one
146 162
 # being our regression head
147 163
 x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
148 164
 x = Dropout(0.2)(x)
149 165
 x = Dense(1024, activation="relu")(x)
166
+x = Dropout(0.2)(x)
167
+x = Dense(1024, activation="relu")(x)
150 168
 x = Dense(1024, activation="relu")(x)
169
+# x = Dense(512, activation="relu")(x)
151 170
 # 在建设一层
152 171
 x = Dense(3, activation="softmax")(x)
153 172
 
154 173
 # our final model will accept categorical/numerical data on the MLP
155 174
 # input and images on the CNN input, outputting a single value (the
156 175
 # predicted price of the house)
157
-model = Model(inputs=[mlp.input, cnn_0.input,], outputs=x)
176
+model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
158 177
 
159 178
 
160 179
 print("Starting training ")
@@ -169,7 +188,7 @@ model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy
169 188
 # train the model
170 189
 print("[INFO] training model...")
171 190
 model.fit(
172
-    [train_x_c, train_x_a,], train_y,
191
+    [train_x_c, train_x_a, ], train_y,
173 192
     # validation_data=([testAttrX, testImagesX], testY),
174 193
     # epochs=int(3*train_x_a.shape[0]/1300),
175 194
     epochs=epochs,
@@ -187,7 +206,7 @@ test_x_c = test_x[:,18*20:]
187 206
 
188 207
 # make predictions on the testing data
189 208
 print("[INFO] predicting house prices...")
190
-score  = model.evaluate([test_x_c, test_x_a], test_y)
209
+score  = model.evaluate([test_x_c, test_x_a,], test_y)
191 210
 
192 211
 print(score)
193 212
 print('Test score:', score[0])

+ 196 - 0
mix/mix_train_300.py

@@ -0,0 +1,196 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+from keras.callbacks import EarlyStopping
18
+
19
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
20
+
21
+epochs= 8
22
+size = 10000 #18W 60W
23
+file_path = 'D:\\data\\quantization\\stock300_18d_train2.log'
24
+model_path = '300_18d_mix_6D_ma5_s_seq.h5'
25
+file_path1='D:\\data\\quantization\\stock300_18d_test.log'
26
+col = 18
27
+'''
28
+ROC     30*18
29
+DMI     30*20
30
+MACD    30*19
31
+RSI     30*17
32
+
33
+'''
34
+
35
+def read_data(path, path1=file_path1):
36
+    lines = []
37
+    with open(path) as f:
38
+        for x in range(size): #610000
39
+            line = eval(f.readline().strip())
40
+            lines.append(line)
41
+
42
+    with open(path1) as f:
43
+        for x in range(20000):
44
+            line = eval(f.readline().strip())
45
+            lines.append(line)
46
+
47
+    random.shuffle(lines)
48
+    print('读取数据完毕')
49
+
50
+    d=int(0.85*len(lines))
51
+    length = len(lines[0])
52
+
53
+    train_x=[s[:length - 2] for s in lines[0:d]]
54
+    train_y=[s[-1] for s in lines[0:d]]
55
+    test_x=[s[:length - 2] for s in lines[d:]]
56
+    test_y=[s[-1] for s in lines[d:]]
57
+
58
+    print('转换数据完毕')
59
+
60
+    ros = RandomOverSampler(random_state=0)
61
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
62
+
63
+    print('数据重采样完毕')
64
+
65
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
66
+
67
+
68
+train_x,train_y,test_x,test_y=read_data(file_path)
69
+
70
+train_x_a = train_x[:,:30*col]
71
+train_x_a = train_x_a.reshape(train_x.shape[0], 30, col, 1)
72
+# train_x_b = train_x[:, 9*26:18*26]
73
+# train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
74
+train_x_c = train_x[:,30*col:]
75
+
76
+
77
+def create_mlp(dim, regress=False):
78
+    # define our MLP network
79
+    model = Sequential()
80
+    model.add(Dense(128, input_dim=dim, activation="relu"))
81
+    model.add(Dropout(0.2))
82
+    model.add(Dense(128, activation="relu"))
83
+
84
+    # check to see if the regression node should be added
85
+    if regress:
86
+        model.add(Dense(1, activation="linear"))
87
+
88
+    # return our model
89
+    return model
90
+
91
+
92
+def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
93
+    # initialize the input shape and channel dimension, assuming
94
+    # TensorFlow/channels-last ordering
95
+    inputShape = (width, height, 1)
96
+    chanDim = -1
97
+
98
+    # define the model input
99
+    inputs = Input(shape=inputShape)
100
+    # x = inputs
101
+    # CONV => RELU => BN => POOL
102
+    x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
103
+    x = Activation("relu")(x)
104
+    x = BatchNormalization(axis=chanDim)(x)
105
+
106
+    # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
107
+    # y = Activation("relu")(y)
108
+    # y = BatchNormalization(axis=chanDim)(y)
109
+
110
+    # flatten the volume, then FC => RELU => BN => DROPOUT
111
+    x = Flatten()(x)
112
+    x = Dense(output)(x)
113
+    x = Activation("relu")(x)
114
+    x = BatchNormalization(axis=chanDim)(x)
115
+    x = Dropout(0.2)(x)
116
+
117
+    # apply another FC layer, this one to match the number of nodes
118
+    # coming out of the MLP
119
+    x = Dense(output)(x)
120
+    x = Activation("relu")(x)
121
+
122
+    # check to see if the regression node should be added
123
+    if regress:
124
+        x = Dense(1, activation="linear")(x)
125
+
126
+    # construct the CNN
127
+    model = Model(inputs, x)
128
+
129
+    # return the CNN
130
+    return model
131
+
132
+
133
+# create the MLP and CNN models
134
+mlp = create_mlp(train_x_c.shape[1], regress=False)
135
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96)       # 31 97 46
136
+cnn_0 = create_cnn(30, col, 1, kernel_size=(6, col), size=96, regress=False, output=96)         # 29 98 47
137
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96)         # 28 97 53
138
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
139
+# cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
140
+# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
141
+
142
+# create the input to our final set of layers as the *output* of both
143
+# the MLP and CNN
144
+combinedInput = concatenate([mlp.output, cnn_0.output, ])
145
+
146
+# our final FC layer head will have two dense layers, the final one
147
+# being our regression head
148
+x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
149
+x = Dropout(0.2)(x)
150
+x = Dense(1024, activation="relu")(x)
151
+x = Dense(1024, activation="relu")(x)
152
+x = Dense(1024, activation="relu")(x)
153
+# 在建设一层
154
+x = Dense(3, activation="softmax")(x)
155
+
156
+# our final model will accept categorical/numerical data on the MLP
157
+# input and images on the CNN input, outputting a single value (the
158
+# predicted price of the house)
159
+model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
160
+
161
+
162
+print("Starting training ")
163
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
164
+
165
+# compile the model using mean absolute percentage error as our loss,
166
+# implying that we seek to minimize the absolute percentage difference
167
+# between our price *predictions* and the *actual prices*
168
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
169
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
170
+
171
+# train the model
172
+print("[INFO] training model...")
173
+model.fit(
174
+    [train_x_c, train_x_a, ], train_y,
175
+    # validation_data=([testAttrX, testImagesX], testY),
176
+    # epochs=int(3*train_x_a.shape[0]/1300),
177
+    epochs=epochs,
178
+    batch_size=2048, shuffle=True,
179
+    callbacks=[early_stopping]
180
+)
181
+
182
+model.save(model_path)
183
+
184
+test_x_a = test_x[:,:30*col]
185
+test_x_a = test_x_a.reshape(test_x.shape[0], 30, col, 1)
186
+# test_x_b = test_x[:, 9*26:9*26+9*26]
187
+# test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
188
+test_x_c = test_x[:,30*col:]
189
+
190
+# make predictions on the testing data
191
+print("[INFO] predicting house prices...")
192
+score  = model.evaluate([test_x_c, test_x_a,], test_y)
193
+
194
+print(score)
195
+print('Test score:', score[0])
196
+print('Test accuracy:', score[1])