Browse Source

mix(cnn+dnn)训练 16数据最好

yufeng 4 years ago
parent
commit
a9e02f1cc1

BIN
mix/16_18d_mix_5D_close_seq.h5


BIN
mix/16_18d_mix_seq.h5


+ 8 - 8
mix/lstm_predict.py

@@ -45,14 +45,14 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
45 45
     test_x,test_y,lines=read_data(file_path)
46 46
     print('Load data success')
47 47
 
48
-    test_x_a = test_x[:,:18*16]
49
-    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 16)
50
-    test_x_b = test_x[:, 18*16:18*16+10*18]
51
-    test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
52
-    test_x_c = test_x[:,18*16+10*18:]
48
+    test_x_a = test_x[:,:18*24]
49
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
50
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
51
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
52
+    test_x_c = test_x[:,18*24:]
53 53
 
54 54
     model=load_model(model_path)
55
-    score = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)
55
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
56 56
     print('MIX', score)
57 57
 
58 58
     up_num = 0
@@ -62,7 +62,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
62 62
     down_error = 0
63 63
     down_right = 0
64 64
     i = 0
65
-    result=model.predict([test_x_c, test_x_a, test_x_b])
65
+    result=model.predict([test_x_c, test_x_a, ])
66 66
     win_dnn = []
67 67
     for r in result:
68 68
         fact = test_y[i]
@@ -102,7 +102,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
102 102
 
103 103
 
104 104
 if __name__ == '__main__':
105
-    predict(file_path='D:\\data\\quantization\\stock19_18d_test.log', model_path='19_18d_lstm_seq.h5')
105
+    predict(file_path='D:\\data\\quantization\\stock17_18d_test.log', model_path='17_18d_lstm_seq.h5')
106 106
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
107 107
     # multi_predict(model='15_18d')
108 108
     # predict_today(20200229, model='11_18d')

+ 24 - 25
mix/lstm_train.py

@@ -20,9 +20,8 @@ from keras.callbacks import EarlyStopping
20 20
 
21 21
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
22 22
 
23
-epochs= 40
24
-size = 80000
25
-
23
+epochs= 200
24
+size = 380000
26 25
 
27 26
 def read_data(path):
28 27
     lines = []
@@ -50,13 +49,13 @@ def read_data(path):
50 49
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
51 50
 
52 51
 
53
-train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock19_18d_train_1.log")
52
+train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock17_18d_train.log")
54 53
 
55
-train_x_a = train_x[:,:18*16]
56
-train_x_a = train_x_a.reshape(train_x.shape[0], 18, 16)
57
-train_x_b = train_x[:, 18*16:18*16+10*18]
58
-train_x_b = train_x_b.reshape(train_x.shape[0], 18, 10, 1)
59
-train_x_c = train_x[:,18*16+10*18:]
54
+train_x_a = train_x[:,:18*24]
55
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
56
+# train_x_b = train_x[:, 18*24:18*16+10*18]
57
+# train_x_b = train_x_b.reshape(train_x.shape[0], 18, 10, 1)
58
+train_x_c = train_x[:,18*24:]
60 59
 
61 60
 
62 61
 def create_mlp(dim, regress=False):
@@ -127,7 +126,7 @@ def create_lstm(sample, timesteps, input_dim):
127 126
 
128 127
     x = inputs
129 128
 
130
-    x = LSTM(units = 32, input_shape=(18, 16), dropout=0.2
129
+    x = LSTM(units = 64, input_shape=(timesteps, input_dim), dropout=0.2
131 130
                )(x)
132 131
     # x = LSTM(16*16, return_sequences=False)
133 132
     # x = Activation("relu")(x)
@@ -143,26 +142,26 @@ def create_lstm(sample, timesteps, input_dim):
143 142
 
144 143
 # create the MLP and CNN models
145 144
 mlp = create_mlp(train_x_c.shape[1], regress=False)
146
-cnn_0 = create_lstm(train_x_a.shape[1], 18, 16)
147
-cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
145
+cnn_0 = create_lstm(train_x_a.shape[1], 18, 24)
146
+# cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
148 147
 
149 148
 # create the input to our final set of layers as the *output* of both
150 149
 # the MLP and CNN
151
-combinedInput = concatenate([mlp.output, cnn_0.output, cnn_1.output])
150
+combinedInput = concatenate([mlp.output, cnn_0.output,])
152 151
 
153 152
 # our final FC layer head will have two dense layers, the final one
154 153
 # being our regression head
155
-x = Dense(888, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
154
+x = Dense(666, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
156 155
 x = Dropout(0.2)(x)
157
-x = Dense(888, activation="relu")(x)
158
-x = Dense(888, activation="relu")(x)
156
+x = Dense(666, activation="relu")(x)
157
+x = Dense(666, activation="relu")(x)
159 158
 # 在建设一层
160 159
 x = Dense(5, activation="softmax")(x)
161 160
 
162 161
 # our final model will accept categorical/numerical data on the MLP
163 162
 # input and images on the CNN input, outputting a single value (the
164 163
 # predicted price of the house)
165
-model = Model(inputs=[mlp.input, cnn_0.input, cnn_1.input], outputs=x)
164
+model = Model(inputs=[mlp.input, cnn_0.input,], outputs=x)
166 165
 
167 166
 
168 167
 print("Starting training ")
@@ -178,7 +177,7 @@ model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy
178 177
 # train the model
179 178
 print("[INFO] training model...")
180 179
 model.fit(
181
-    [train_x_c, train_x_a, train_x_b], train_y,
180
+    [train_x_c, train_x_a, ], train_y,
182 181
     # validation_data=([testAttrX, testImagesX], testY),
183 182
     # epochs=int(3*train_x_a.shape[0]/1300),
184 183
     epochs=epochs,
@@ -186,20 +185,20 @@ model.fit(
186 185
     callbacks=[early_stopping]
187 186
 )
188 187
 
189
-test_x_a = test_x[:,:18*16]
190
-test_x_a = test_x_a.reshape(test_x.shape[0], 18, 16)
191
-test_x_b = test_x[:, 18*16:18*16+10*18]
192
-test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
193
-test_x_c = test_x[:,18*16+10*18:]
188
+test_x_a = test_x[:,:18*24]
189
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
190
+# test_x_b = test_x[:, 18*16:18*16+10*18]
191
+# test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
192
+test_x_c = test_x[:,18*24:]
194 193
 
195 194
 # make predictions on the testing data
196 195
 print("[INFO] predicting house prices...")
197
-score  = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)
196
+score  = model.evaluate([test_x_c, test_x_a], test_y)
198 197
 
199 198
 print(score)
200 199
 print('Test score:', score[0])
201 200
 print('Test accuracy:', score[1])
202 201
 
203
-path="19_18d_lstm_seq.h5"
202
+path="17_18d_lstm_seq.h5"
204 203
 model.save(path)
205 204
 model=None

+ 106 - 0
mix/mix_predict_1.py

@@ -0,0 +1,106 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    lines = []
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
13
+                lines.append(line)
14
+
15
+    size = len(lines[0])
16
+    train_x=[s[:size - 2] for s in lines]
17
+    train_y=[s[size-1] for s in lines]
18
+    return np.array(train_x),np.array(train_y),lines
19
+
20
+
21
+def _score(fact, line):
22
+    with open('dnn_predict_dmi_18d.txt', 'a') as f:
23
+        f.write(str([line[-2], line[-1]]) + "\n")
24
+
25
+    up_right = 0
26
+    up_error = 0
27
+
28
+    if fact[0] == 1:
29
+        up_right = up_right + 1.12
30
+    elif fact[1] == 1:
31
+        up_right = up_right + 1.06
32
+    elif fact[2] == 1:
33
+        up_right = up_right + 1
34
+        up_error = up_error + 0.5
35
+    elif fact[3] == 1:
36
+        up_right = up_right + 0.94
37
+        up_error = up_error + 1
38
+    else:
39
+        up_error = up_error + 1
40
+        up_right = up_right + 0.88
41
+    return up_right,up_error
42
+
43
+
44
+def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
45
+    test_x,test_y,lines=read_data(file_path)
46
+
47
+    test_x_a = test_x[:,:18*24]
48
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
49
+    # test_x_b = test_x[:, 18*18:18*18+2*18]
50
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
51
+    test_x_c = test_x[:,18*24:]
52
+
53
+    model=load_model(model_path)
54
+    score = model.evaluate([test_x_c, test_x_a], test_y)
55
+    print('MIX', score)
56
+
57
+    up_num = 0
58
+    up_error = 0
59
+    up_right = 0
60
+    down_num = 0
61
+    down_error = 0
62
+    down_right = 0
63
+    i = 0
64
+    result=model.predict([test_x_c, test_x_a,])
65
+    win_dnn = []
66
+    for r in result:
67
+        fact = test_y[i]
68
+
69
+        if idx in [-2]:
70
+            if r[0] > 0.5 or r[1] > 0.5:
71
+                pass
72
+        else:
73
+            if r[0] > 0.6 or r[1] > 0.6:
74
+                tmp_right,tmp_error = _score(fact, lines[i])
75
+                up_right = tmp_right + up_right
76
+                up_error = tmp_error + up_error
77
+                up_num = up_num + 1
78
+            elif r[3] > 0.6 or r[4] > 0.6:
79
+                if fact[0] == 1:
80
+                    down_error = down_error + 1
81
+                    down_right = down_right + 1.12
82
+                elif fact[1] == 1:
83
+                    down_error = down_error + 1
84
+                    down_right = down_right + 1.06
85
+                elif fact[2] == 1:
86
+                    down_right = down_right + 1
87
+                elif fact[3] == 1:
88
+                    down_right = down_right + 0.94
89
+                else:
90
+                    down_right = down_right + 0.88
91
+                down_num = down_num + 1
92
+
93
+        i = i + 1
94
+    if up_num == 0:
95
+        up_num = 1
96
+    if down_num == 0:
97
+        down_num = 1
98
+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
99
+    return win_dnn,up_right/up_num,down_right/down_num
100
+
101
+
102
+if __name__ == '__main__':
103
+    predict(file_path='D:\\data\\quantization\\stock16_18d_test_close.log', model_path='16_18d_mix_seq.h5')
104
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
105
+    # multi_predict(model='15_18d')
106
+    # predict_today(20200229, model='11_18d')

+ 16 - 33
mix/mix_predict_by_day.py

@@ -22,10 +22,7 @@ def predict(file_path='', model_path='15min_dnn_seq'):
22 22
     day_lines = read_data(file_path)
23 23
     print('数据读取完毕')
24 24
 
25
-    models = []
26
-    for x in range(0, 12):
27
-        models.append(load_model(model_path + '_' + str(x) + '.h5'))
28
-    estimator = joblib.load('km_dmi_18.pkl')
25
+    model=load_model(model_path + '.h5')
29 26
     print('模型加载完毕')
30 27
 
31 28
     items = sorted(day_lines.keys())
@@ -35,39 +32,25 @@ def predict(file_path='', model_path='15min_dnn_seq'):
35 32
 
36 33
         up_num = 0
37 34
         down_num = 0
38
-        x = 24 # 每条数据项数
39
-        k = 18 # 周期
35
+        size = len(lines[0])
36
+
40 37
         for line in lines:
41
-            v = line[1:x*k + 1]
42
-            v = np.array(v)
43
-            v = v.reshape(k, x)
44
-            v = v[:,4:8]
45
-            v = v.reshape(1, 4*k)
46
-            # print(v)
47
-            r = estimator.predict(v)
38
+            train_x = np.array([line[:size - 1]])
39
+            train_x_a = train_x[:,:18*24]
40
+            train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
41
+            # train_x_b = train_x[:, 18*18:18*18+2*18]
42
+            # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
43
+            train_x_c = train_x[:,18*24:]
48 44
 
49
-            train_x = np.array([line[:-1]])
50
-            result = models[r[0]].predict(train_x)
45
+            result = model.predict([train_x_c, train_x_a])
51 46
 
52 47
             if result[0][3] > 0.5 or result[0][4] > 0.5:
53 48
                 down_num = down_num + 1
54 49
             elif result[0][1] > 0.5 or result[0][0] > 0.5:
55
-                up_num = up_num + 0.6  # 乐观调大 悲观调小
56
-
57
-            # if result[0][0] > 0.5 or result[0][1] > 0.5:
58
-            #     if r[0] in [0,2,3,4,5,9,10,11]:
59
-            #         up_num = up_num + 1
60
-            #     elif r[0] in [8]:
61
-            #         up_num = up_num + 0.6
62
-            #     else:
63
-            #         up_num = up_num + 0.4
64
-            # if result[0][3] > 0.5 or result[0][4] > 0.5:
65
-            #     if r[0] in [4,6,]:
66
-            #         down_num = down_num + 1
67
-            #     elif r[0] in [0,1,3,7,8,]:
68
-            #         down_num = down_num + 0.6
69
-            #     else:
70
-            #         down_num = down_num + 0.4
50
+                up_num = up_num + 1
51
+            else:
52
+                up_num = up_num + 0.4 # 乐观调大 悲观调小
53
+                down_num = down_num + 0.6
71 54
 
72 55
         print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2))
73 56
 
@@ -76,6 +59,6 @@ if __name__ == '__main__':
76 59
     # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
77 60
     # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
78 61
     # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
79
-    predict(file_path='D:\\data\\quantization\\stock11_18d_20200229.log', model_path='11_18d_dnn_seq')
80
-    # predict(file_path='D:\\data\\quantization\\stock11_18d_20190103_20190604.log', model_path='14_18d_dnn_seq')
62
+    # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
63
+    predict(file_path='D:\\data\\quantization\\stock16_18d_20191225_20200310_1.log', model_path='16_18d_mix_seq')
81 64
     # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 189 - 0
mix/mix_predict_everyday.py

@@ -0,0 +1,189 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+holder_stock_list = [
8
+                        '000063.SZ',
9
+                        '002373.SZ',
10
+                        '300253.SZ',
11
+                        '300059.SZ',
12
+                        '300807.SZ',
13
+                        '600345.SH',
14
+                         # b账户
15
+                        '300422.SZ',
16
+                        '300468.SZ',
17
+    ]
18
+
19
+
20
+def read_data(path):
21
+    lines = []
22
+    with open(path) as f:
23
+        for line in f.readlines()[:]:
24
+            line = eval(line.strip())
25
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
26
+                lines.append(line)
27
+
28
+    size = len(lines[0])
29
+    train_x=[s[:size - 2] for s in lines]
30
+    train_y=[s[size-1] for s in lines]
31
+    return np.array(train_x),np.array(train_y),lines
32
+
33
+
34
+import pymongo
35
+from util.mongodb import get_mongo_table_instance
36
+code_table = get_mongo_table_instance('tushare_code')
37
+k_table = get_mongo_table_instance('stock_day_k')
38
+stock_concept_table = get_mongo_table_instance('tushare_concept_detail')
39
+all_concept_code_list = list(get_mongo_table_instance('tushare_concept').find({}))
40
+
41
+
42
+industry = ['家用电器', '元器件', 'IT设备', '汽车服务',
43
+            '汽车配件', '软件服务',
44
+            '互联网', '纺织',
45
+            '塑料', '半导体',]
46
+
47
+A_concept_code_list = [   'TS2', # 5G
48
+                        'TS24', # OLED
49
+                        'TS26', #健康中国
50
+                        'TS43',  #新能源整车
51
+                        'TS59', # 特斯拉
52
+                        'TS65', #汽车整车
53
+                        'TS142', # 物联网
54
+                        'TS153', # 无人驾驶
55
+                        'TS163', # 雄安板块-智慧城市
56
+                        'TS175', # 工业自动化
57
+                        'TS232', # 新能源汽车
58
+                        'TS254', # 人工智能
59
+                        'TS258', # 互联网医疗
60
+                        'TS264', # 工业互联网
61
+                        'TS266', # 半导体
62
+                        'TS269', # 智慧城市
63
+                        'TS271', # 3D玻璃
64
+                        'TS295', # 国产芯片
65
+                        'TS303', # 医疗信息化
66
+                        'TS323', # 充电桩
67
+                        'TS328', # 虹膜识别
68
+                        'TS361', # 病毒
69
+    ]
70
+
71
+
72
+gainian_map = {}
73
+hangye_map = {}
74
+
75
+def predict_today(day, model='10_18d', log=True):
76
+    lines = []
77
+    with open('D:\\data\\quantization\\stock' + model[:-4] + '_' +  str(day) +'.log') as f:
78
+        for line in f.readlines()[:]:
79
+            line = eval(line.strip())
80
+            # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
81
+            lines.append(line)
82
+
83
+    size = len(lines[0])
84
+
85
+    model=load_model(model + '_seq.h5')
86
+
87
+    for line in lines:
88
+        train_x = np.array([line[:size - 1]])
89
+        train_x_a = train_x[:,:18*24]
90
+        train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
91
+        # train_x_b = train_x[:, 18*18:18*18+2*18]
92
+        # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
93
+        train_x_c = train_x[:,18*24:]
94
+
95
+        result = model.predict([train_x_c, train_x_a])
96
+        # print(result, line[-1])
97
+        stock = code_table.find_one({'ts_code':line[-1][0]})
98
+
99
+        if result[0][0] > 0.5 or result[0][1] > 0.5:
100
+            if line[-1][0].startswith('688'):
101
+                continue
102
+            # 去掉ST
103
+            if stock['name'].startswith('ST') or stock['name'].startswith('N') or stock['name'].startswith('*'):
104
+                continue
105
+
106
+            if stock['ts_code'] in holder_stock_list:
107
+                print(stock['ts_code'], stock['name'], '维持买入评级')
108
+
109
+            # 跌的
110
+            k_table_list = list(k_table.find({'code':line[-1][0], 'tradeDate':{'$lte':day}}).sort("tradeDate", pymongo.DESCENDING).limit(5))
111
+            # if k_table_list[0]['close'] > k_table_list[-1]['close']*1.20:
112
+            #     continue
113
+            # if k_table_list[0]['close'] < k_table_list[-1]['close']*0.90:
114
+            #     continue
115
+            # if k_table_list[-1]['close'] > 80:
116
+            #     continue
117
+
118
+            # 指定某几个行业
119
+            # if stock['industry'] in industry:
120
+            concept_code_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
121
+            concept_detail_list = []
122
+
123
+            # 处理行业
124
+            if stock['sw_industry'] in hangye_map:
125
+                i_c = hangye_map[stock['sw_industry']]
126
+                hangye_map[stock['sw_industry']] = i_c + 1
127
+            else:
128
+                hangye_map[stock['sw_industry']] = 1
129
+
130
+            # if len(concept_code_list) > 0:
131
+            #     for concept in concept_code_list:
132
+            #         for c in all_concept_code_list:
133
+            #             if c['code'] == concept['concept_code']:
134
+            #                 concept_detail_list.append(c['name'])
135
+            #
136
+            #                 if c['name'] in gainian_map:
137
+            #                     g_c = gainian_map[c['name']]
138
+            #                     gainian_map[c['name']] = g_c + 1
139
+            #                 else:
140
+            #                     gainian_map[c['name']] = 1
141
+
142
+            print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
143
+
144
+            if log is True:
145
+                with open('D:\\data\\quantization\\predict\\' + str(day) + '.txt', mode='a', encoding="utf-8") as f:
146
+                    f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
147
+
148
+
149
+            # concept_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
150
+            # concept_list = [c['concept_code'] for c in concept_list]
151
+
152
+        elif result[0][2] > 0.5:
153
+            if stock['ts_code'] in holder_stock_list:
154
+                print(stock['ts_code'], stock['name'], '震荡评级')
155
+
156
+        elif result[0][3] > 0.5 or result[0][4] > 0.5:
157
+            if stock['ts_code'] in holder_stock_list:
158
+                print(stock['ts_code'], stock['name'], '赶紧卖出')
159
+        else:
160
+            if stock['ts_code'] in holder_stock_list:
161
+                print(stock['ts_code'], stock['name'], result[0],)
162
+
163
+    print(gainian_map)
164
+    print(hangye_map)
165
+
166
+
167
+def _read_pfile_map(path):
168
+    s_list = []
169
+    with open(path, encoding='utf-8') as f:
170
+        for line in f.readlines()[:]:
171
+            s_list.append(line)
172
+    return s_list
173
+
174
+
175
+def join_two_day(a, b):
176
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(a) + '.txt')
177
+    b_list = _read_pfile_map('D:\\data\\quantization\\predict\\dmi_' + str(b) + '.txt')
178
+    for a in a_list:
179
+        for b in b_list:
180
+            if a[2:11] == b[2:11]:
181
+                print(a)
182
+
183
+
184
+if __name__ == '__main__':
185
+    # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
186
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
187
+    # multi_predict()
188
+    predict_today(20200310, model='16_18d_mix', log=True)
189
+    # join_two_day(20200305, 20200305)

+ 5 - 5
mix/mix_train.py

@@ -19,8 +19,8 @@ from keras.callbacks import EarlyStopping
19 19
 
20 20
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
21 21
 
22
-epochs= 50
23
-size = 80000
22
+epochs= 120
23
+size = 380000
24 24
 
25 25
 
26 26
 def read_data(path):
@@ -120,8 +120,8 @@ def create_cnn(width, height, depth, filters=32, kernel_size=(5, 6), regress=Fal
120 120
 
121 121
 # create the MLP and CNN models
122 122
 mlp = create_mlp(train_x_c.shape[1], regress=False)
123
-cnn_0 = create_cnn(18, 16, 1, kernel_size=(3, 5), filters=32, regress=False, output=150)
124
-cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 4), filters=32, regress=False, output=120)
123
+cnn_0 = create_cnn(18, 16, 1, kernel_size=(6, 5), filters=32, regress=False, output=150)
124
+cnn_1 = create_cnn(18, 10, 1, kernel_size=(6, 6), filters=32, regress=False, output=120)
125 125
 
126 126
 # create the input to our final set of layers as the *output* of both
127 127
 # the MLP and CNN
@@ -176,6 +176,6 @@ print(score)
176 176
 print('Test score:', score[0])
177 177
 print('Test accuracy:', score[1])
178 178
 
179
-path="19_18d_mix_seq.h5"
179
+path="16_18d_mix_seq.h5"
180 180
 model.save(path)
181 181
 model=None

+ 173 - 0
mix/mix_train_1.py

@@ -0,0 +1,173 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+
18
+epochs= 130
19
+size = 380000
20
+
21
+
22
+def read_data(path):
23
+    lines = []
24
+    with open(path) as f:
25
+        for x in range(size): #380000
26
+            lines.append(eval(f.readline().strip()))
27
+
28
+    random.shuffle(lines)
29
+    print('读取数据完毕')
30
+
31
+    d=int(0.7*len(lines))
32
+
33
+    train_x=[s[:-2] for s in lines[0:d]]
34
+    train_y=[s[-1] for s in lines[0:d]]
35
+    test_x=[s[:-2] for s in lines[d:]]
36
+    test_y=[s[-1] for s in lines[d:]]
37
+
38
+    print('转换数据完毕')
39
+
40
+    ros = RandomOverSampler(random_state=0)
41
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
42
+
43
+    print('数据重采样完毕')
44
+
45
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
46
+
47
+
48
+train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
49
+
50
+train_x_a = train_x[:,:18*24]
51
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
52
+# train_x_b = train_x[:, 18*18:18*18+2*18]
53
+# train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
54
+train_x_c = train_x[:,18*24:]
55
+
56
+
57
+def create_mlp(dim, regress=False):
58
+    # define our MLP network
59
+    model = Sequential()
60
+    model.add(Dense(64, input_dim=dim, activation="relu"))
61
+    model.add(Dense(64, activation="relu"))
62
+
63
+    # check to see if the regression node should be added
64
+    if regress:
65
+        model.add(Dense(1, activation="linear"))
66
+
67
+    # return our model
68
+    return model
69
+
70
+
71
+def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
72
+    # initialize the input shape and channel dimension, assuming
73
+    # TensorFlow/channels-last ordering
74
+    inputShape = (width, height, 1)
75
+    chanDim = -1
76
+
77
+    # define the model input
78
+    inputs = Input(shape=inputShape)
79
+
80
+    x = inputs
81
+
82
+    # CONV => RELU => BN => POOL
83
+    x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
84
+    x = Activation("relu")(x)
85
+    x = BatchNormalization(axis=chanDim)(x)
86
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
87
+    # if width > 2:
88
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
89
+    #     x = Activation("relu")(x)
90
+    #     x = BatchNormalization(axis=chanDim)(x)
91
+
92
+    # flatten the volume, then FC => RELU => BN => DROPOUT
93
+    x = Flatten()(x)
94
+    x = Dense(output)(x)
95
+    x = Activation("relu")(x)
96
+    x = BatchNormalization(axis=chanDim)(x)
97
+    x = Dropout(0.2)(x)
98
+
99
+    # apply another FC layer, this one to match the number of nodes
100
+    # coming out of the MLP
101
+    x = Dense(output)(x)
102
+    x = Activation("relu")(x)
103
+
104
+    # check to see if the regression node should be added
105
+    if regress:
106
+        x = Dense(1, activation="linear")(x)
107
+
108
+    # construct the CNN
109
+    model = Model(inputs, x)
110
+
111
+    # return the CNN
112
+    return model
113
+
114
+
115
+# create the MLP and CNN models
116
+mlp = create_mlp(train_x_c.shape[1], regress=False)
117
+cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
118
+# cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
119
+
120
+# create the input to our final set of layers as the *output* of both
121
+# the MLP and CNN
122
+combinedInput = concatenate([mlp.output, cnn_0.output])
123
+
124
+# our final FC layer head will have two dense layers, the final one
125
+# being our regression head
126
+x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
127
+x = Dropout(0.2)(x)
128
+x = Dense(512, activation="relu")(x)
129
+x = Dense(512, activation="relu")(x)
130
+# 在建设一层
131
+x = Dense(5, activation="softmax")(x)
132
+
133
+# our final model will accept categorical/numerical data on the MLP
134
+# input and images on the CNN input, outputting a single value (the
135
+# predicted price of the house)
136
+model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
137
+
138
+
139
+print("Starting training ")
140
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
141
+
142
+# compile the model using mean absolute percentage error as our loss,
143
+# implying that we seek to minimize the absolute percentage difference
144
+# between our price *predictions* and the *actual prices*
145
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
146
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
147
+
148
+# train the model
149
+print("[INFO] training model...")
150
+model.fit(
151
+    [train_x_c, train_x_a], train_y,
152
+    # validation_data=([testAttrX, testImagesX], testY),
153
+    # epochs=int(3*train_x_a.shape[0]/1300),
154
+    epochs=epochs,
155
+    batch_size=2048, shuffle=True)
156
+
157
+test_x_a = test_x[:,:18*24]
158
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
159
+# test_x_b = test_x[:, 18*18:18*18+2*18]
160
+# test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
161
+test_x_c = test_x[:,18*24:]
162
+
163
+# make predictions on the testing data
164
+print("[INFO] predicting house prices...")
165
+score  = model.evaluate([test_x_c, test_x_a], test_y)
166
+
167
+print(score)
168
+print('Test score:', score[0])
169
+print('Test accuracy:', score[1])
170
+
171
+path="16_18d_mix_seq.h5"
172
+model.save(path)
173
+model=None