Browse Source

比较lstm kmeans后分类的情况

yufeng 4 years ago
parent
commit
6ce2ca916d

+ 125 - 0
mix/lstm_kmeans_predict.py

@@ -0,0 +1,125 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
7
+data_dir = 'D:\\data\\quantization\\'
8
+kmeans = 'roc'
9
+
10
+
11
+def read_data(path):
12
+    lines = []
13
+    with open(path) as f:
14
+        for line in f.readlines()[:]:
15
+            line = eval(line.strip())
16
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
17
+                lines.append(line)
18
+
19
+    size = len(lines[0])
20
+    train_x=[s[:size - 2] for s in lines]
21
+    train_y=[s[size-1] for s in lines]
22
+    return np.array(train_x),np.array(train_y),lines
23
+
24
+
25
+def _score(fact, line):
26
+    with open('mix_predict_dmi_18d.txt', 'a') as f:
27
+        f.write(str([line[-2], line[-1]]) + "\n")
28
+
29
+    up_right = 0
30
+    up_error = 0
31
+
32
+    if fact[0] == 1:
33
+        up_right = up_right + 1.12
34
+    elif fact[1] == 1:
35
+        up_right = up_right + 1.06
36
+    elif fact[2] == 1:
37
+        up_right = up_right + 1
38
+        up_error = up_error + 0.5
39
+    elif fact[3] == 1:
40
+        up_right = up_right + 0.94
41
+        up_error = up_error + 1
42
+    else:
43
+        up_error = up_error + 1
44
+        up_right = up_right + 0.88
45
+    return up_right,up_error
46
+
47
+
48
+def mul_predict(name="10_18d"):
49
+    r = 0
50
+    p = 0
51
+
52
+    for x in range(0, 8):
53
+        win_dnn, up_ratio,down_ratio  = predict(data_dir + kmeans + '\\stock160_18d_train1_B_' + str(x) + ".log", x) # stock160_18d_trai_0
54
+
55
+        r = r + up_ratio
56
+        p = p + down_ratio
57
+
58
+    print(r, p)
59
+
60
+
61
+def predict(file_path='', idx=-1):
62
+    test_x,test_y,lines=read_data(file_path)
63
+    print(idx, 'Load data success')
64
+
65
+    test_x_a = test_x[:,:18*24]
66
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
67
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
68
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
69
+    test_x_c = test_x[:,18*24:]
70
+
71
+    model=load_model(model_path.split('.')[0] + '_' + str(idx) + '.h5')
72
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
73
+    print('LSTM', score)
74
+
75
+    up_num = 0
76
+    up_error = 0
77
+    up_right = 0
78
+    down_num = 0
79
+    down_error = 0
80
+    down_right = 0
81
+    i = 0
82
+    result=model.predict([test_x_c, test_x_a, ])
83
+    win_dnn = []
84
+    for r in result:
85
+        fact = test_y[i]
86
+
87
+        if idx in [-2]:
88
+            if r[0] > 0.5 or r[1] > 0.5:
89
+                pass
90
+        else:
91
+            if r[0] > 0.6 or r[1] > 0.6:
92
+                tmp_right,tmp_error = _score(fact, lines[i])
93
+                up_right = tmp_right + up_right
94
+                up_error = tmp_error + up_error
95
+                up_num = up_num + 1
96
+            elif r[3] > 0.7 or r[4] > 0.7:
97
+                if fact[0] == 1:
98
+                    down_error = down_error + 1
99
+                    down_right = down_right + 1.12
100
+                elif fact[1] == 1:
101
+                    down_error = down_error + 1
102
+                    down_right = down_right + 1.06
103
+                elif fact[2] == 1:
104
+                    down_error = down_error + 0.5
105
+                    down_right = down_right + 1
106
+                elif fact[3] == 1:
107
+                    down_right = down_right + 0.94
108
+                else:
109
+                    down_right = down_right + 0.88
110
+                down_num = down_num + 1
111
+
112
+        i = i + 1
113
+    if up_num == 0:
114
+        up_num = 1
115
+    if down_num == 0:
116
+        down_num = 1
117
+    print('LSTM', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
118
+    return win_dnn,up_right/up_num,down_right/down_num
119
+
120
+
121
+if __name__ == '__main__':
122
+    # predict(file_path='D:\\data\\quantization\\stock160_18d_10D_test.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
123
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
124
+    mul_predict(name='stock160_18d')
125
+    # predict_today(20200229, model='11_18d')

+ 221 - 0
mix/lstm_kmeans_train.py

@@ -0,0 +1,221 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras.layers import LSTM
16
+from keras import regularizers
17
+from keras.models import Model
18
+
19
+from keras.callbacks import EarlyStopping
20
+
21
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
22
+
23
+epochs= 330
24
+size = 580000  # 61W
25
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
26
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
27
+data_dir = 'D:\\data\\quantization\\'
28
+
29
+
30
+def read_data(path):
31
+    lines = []
32
+    with open(path) as f:
33
+        for line in f.readlines()[:]:
34
+            lines.append(eval(line.strip()))
35
+
36
+    random.shuffle(lines)
37
+    print('读取数据完毕')
38
+
39
+    d=int(0.7*len(lines))
40
+
41
+    train_x=[s[:-2] for s in lines[0:d]]
42
+    train_y=[s[-1] for s in lines[0:d]]
43
+    test_x=[s[:-2] for s in lines[d:]]
44
+    test_y=[s[-1] for s in lines[d:]]
45
+
46
+    print('转换数据完毕')
47
+
48
+    ros = RandomOverSampler(random_state=0)
49
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
50
+
51
+    print('数据重采样完毕')
52
+
53
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
54
+
55
+
56
+def mul_train(name="10_18d"):
57
+    for x in range(0, 8):
58
+        score = train(data_dir + 'kmeans\\' + name + "_trai_" + str(x) + ".log", x) # stock160_18d_trai_0
59
+
60
+        with open(data_dir + name + '_lstm.log', 'a') as f:
61
+            f.write(str(x) + ':' + str(score[1]) + '\n')
62
+
63
+
64
+def train(file_path, idx):
65
+    train_x,train_y,test_x,test_y=read_data(file_path)
66
+
67
+    train_x_a = train_x[:,:18*24]
68
+    train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
69
+    # train_x_b = train_x[:, 18*24:18*16+10*18]
70
+    # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 10, 1)
71
+    train_x_c = train_x[:,18*24:]
72
+
73
+    # create the MLP and CNN models
74
+    mlp = create_mlp(train_x_c.shape[1], regress=False)
75
+    cnn_0 = create_lstm(train_x_a.shape[1], 18, 24)
76
+    # cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
77
+
78
+    # create the input to our final set of layers as the *output* of both
79
+    # the MLP and CNN
80
+    combinedInput = concatenate([mlp.output, cnn_0.output,])
81
+
82
+    # our final FC layer head will have two dense layers, the final one
83
+    # being our regression head
84
+    x = Dense(256, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
85
+    x = Dropout(0.2)(x)
86
+    x = Dense(256, activation="relu")(x)
87
+    x = Dense(512, activation="relu")(x)
88
+    # 在建设一层
89
+    x = Dense(5, activation="softmax")(x)
90
+
91
+    # our final model will accept categorical/numerical data on the MLP
92
+    # input and images on the CNN input, outputting a single value (the
93
+    # predicted price of the house)
94
+    model = Model(inputs=[mlp.input, cnn_0.input,], outputs=x)
95
+
96
+
97
+    print("Starting training ")
98
+    # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
99
+
100
+    # compile the model using mean absolute percentage error as our loss,
101
+    # implying that we seek to minimize the absolute percentage difference
102
+    # between our price *predictions* and the *actual prices*
103
+    opt = Adam(lr=1e-3, decay=1e-3 / 200)
104
+    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'],
105
+                  )
106
+
107
+    # train the model
108
+    print("[INFO] training model...")
109
+    model.fit(
110
+        [train_x_c, train_x_a, ], train_y,
111
+        # validation_data=([testAttrX, testImagesX], testY),
112
+        # epochs=int(3*train_x_a.shape[0]/1300),
113
+        epochs=epochs,
114
+        batch_size=4096, shuffle=True,
115
+        callbacks=[early_stopping]
116
+    )
117
+
118
+    test_x_a = test_x[:,:18*24]
119
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
120
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
121
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
122
+    test_x_c = test_x[:,18*24:]
123
+
124
+    # make predictions on the testing data
125
+    print("[INFO] predicting house prices...")
126
+    score  = model.evaluate([test_x_c, test_x_a], test_y)
127
+
128
+    print(score)
129
+    print('Test score:', score[0])
130
+    print('Test accuracy:', score[1])
131
+
132
+    model.save(model_path.split('.')[0] + '_' + str(idx) + '.h5')
133
+
134
+    return score
135
+
136
+
137
+def create_mlp(dim, regress=False):
138
+    # define our MLP network
139
+    model = Sequential()
140
+    model.add(Dense(64, input_dim=dim, activation="relu"))
141
+    model.add(Dense(64, activation="relu"))
142
+
143
+    # check to see if the regression node should be added
144
+    if regress:
145
+        model.add(Dense(1, activation="linear"))
146
+
147
+    # return our model
148
+    return model
149
+
150
+
151
+def create_cnn(width, height, depth, filters=32, kernel_size=(5, 6), regress=False, output=24):
152
+    # initialize the input shape and channel dimension, assuming
153
+    # TensorFlow/channels-last ordering
154
+    inputShape = (width, height, 1)
155
+    chanDim = -1
156
+
157
+    # define the model input
158
+    inputs = Input(shape=inputShape)
159
+
160
+    x = inputs
161
+
162
+    # CONV => RELU => BN => POOL
163
+    x = Conv2D(filters, kernel_size, strides=(2,2), padding="same",
164
+               # data_format='channels_first'
165
+               )(x)
166
+    x = Activation("relu")(x)
167
+    x = BatchNormalization(axis=chanDim)(x)
168
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
169
+    # if width > 2:
170
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
171
+    #     x = Activation("relu")(x)
172
+    #     x = BatchNormalization(axis=chanDim)(x)
173
+
174
+    # flatten the volume, then FC => RELU => BN => DROPOUT
175
+    x = Flatten()(x)
176
+    x = Dense(output)(x)
177
+    x = Activation("relu")(x)
178
+    x = BatchNormalization(axis=chanDim)(x)
179
+    x = Dropout(0.2)(x)
180
+
181
+    # apply another FC layer, this one to match the number of nodes
182
+    # coming out of the MLP
183
+    x = Dense(output)(x)
184
+    x = Activation("relu")(x)
185
+
186
+    # check to see if the regression node should be added
187
+    if regress:
188
+        x = Dense(1, activation="linear")(x)
189
+
190
+    # construct the CNN
191
+    model = Model(inputs, x)
192
+
193
+    # return the CNN
194
+    return model
195
+
196
+
197
+def create_lstm(sample, timesteps, input_dim):
198
+    inputShape = (timesteps, input_dim)
199
+
200
+    # define the model input
201
+    inputs = Input(shape=inputShape)
202
+
203
+    x = inputs
204
+
205
+    x = LSTM(units = 64, input_shape=(timesteps, input_dim), dropout=0.2
206
+               )(x)
207
+    # x = LSTM(16*16, return_sequences=False)
208
+    # x = Activation("relu")(x)
209
+    x = Dense(64)(x)
210
+    x = Dropout(0.2)(x)
211
+    x = Activation("relu")(x)
212
+
213
+    # construct the CNN
214
+    model = Model(inputs, x)
215
+
216
+    # return the CNN
217
+    return model
218
+
219
+
220
+if __name__ == '__main__':
221
+    mul_train('stock160_18d')

+ 1 - 1
mix/lstm_predict.py

@@ -102,7 +102,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
102
 
102
 
103
 
103
 
104
 if __name__ == '__main__':
104
 if __name__ == '__main__':
105
-    predict(file_path='D:\\data\\quantization\\stock17_18d_test.log', model_path='17_18d_lstm_seq.h5')
105
+    predict(file_path='D:\\data\\quantization\\stock160_18d_train1.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
106
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
106
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
107
     # multi_predict(model='15_18d')
107
     # multi_predict(model='15_18d')
108
     # predict_today(20200229, model='11_18d')
108
     # predict_today(20200229, model='11_18d')

+ 81 - 0
mix/lstm_predict_by_day.py

@@ -0,0 +1,81 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    day_lines = {}
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            date = str(line[-1][-1])
13
+            if date in day_lines:
14
+                day_lines[date].append(line)
15
+            else:
16
+                day_lines[date] = [line]
17
+    # print(len(day_lines['20191230']))
18
+    return day_lines
19
+
20
+
21
+def predict(file_path='', model_path='15min_dnn_seq'):
22
+    day_lines = read_data(file_path)
23
+    print('数据读取完毕')
24
+
25
+    model=load_model(model_path)
26
+    print('模型加载完毕')
27
+
28
+    items = sorted(day_lines.keys())
29
+    for key in items:
30
+        # print(day)
31
+        lines = day_lines[key]
32
+
33
+        up_num = 0
34
+        down_num = 0
35
+        size = len(lines[0])
36
+        x0 = 0
37
+        x1 = 0
38
+        x2 = 0
39
+        x3 = 0
40
+        x4 = 0
41
+
42
+        for line in lines:
43
+            train_x = np.array([line[:size - 1]])
44
+            train_x_a = train_x[:,:18*24]
45
+            train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
46
+            # train_x_b = train_x[:, 18*18:18*18+2*18]
47
+            # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
48
+            train_x_c = train_x[:,18*24:]
49
+
50
+            result = model.predict([train_x_c, train_x_a])
51
+
52
+            if result[0][3] + result[0][4] > 0.5:
53
+                down_num = down_num + 1
54
+            elif result[0][1] + result[0][0] > 0.5:
55
+                up_num = up_num + 0.6
56
+            # else:
57
+            #     up_num = up_num + 0.4 # 乐观调大 悲观调小
58
+            #     down_num = down_num + 0.6
59
+
60
+            # if result[0][0] > 0.5:
61
+            #     x0 = x0 + 1
62
+            # if result[0][1] > 0.5:
63
+            #     x1 = x1 + 1
64
+            # if result[0][2] > 0.5:
65
+            #     x2 = x3 + 1
66
+            # if result[0][3] > 0.5:
67
+            #     x3 = x3 + 1
68
+            # if result[0][4] > 0.5:
69
+            #     x4 = x4 + 1
70
+
71
+        print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2), )
72
+        # print(key, x0, x1, x2,x3,x4)
73
+
74
+
75
+if __name__ == '__main__':
76
+    # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
77
+    # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
78
+    # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
79
+    # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
80
+    predict(file_path='D:\\data\\quantization\\stock16_18d_20191225_20200310.log', model_path='16_18d_lstm_seq.h5')
81
+    # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 207 - 0
mix/lstm_predict_everyday.py

@@ -0,0 +1,207 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+holder_stock_list = [
8
+                        '000063.SZ', '002093.SZ'
9
+                        '300253.SZ', '300807.SZ',
10
+
11
+                         # b账户
12
+
13
+    ]
14
+
15
+
16
+def read_data(path):
17
+    lines = []
18
+    with open(path) as f:
19
+        for line in f.readlines()[:]:
20
+            line = eval(line.strip())
21
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
22
+                lines.append(line)
23
+
24
+    size = len(lines[0])
25
+    train_x=[s[:size - 2] for s in lines]
26
+    train_y=[s[size-1] for s in lines]
27
+    return np.array(train_x),np.array(train_y),lines
28
+
29
+
30
+import pymongo
31
+from util.mongodb import get_mongo_table_instance
32
+code_table = get_mongo_table_instance('tushare_code')
33
+k_table = get_mongo_table_instance('stock_day_k')
34
+stock_concept_table = get_mongo_table_instance('tushare_concept_detail')
35
+all_concept_code_list = list(get_mongo_table_instance('tushare_concept').find({}))
36
+
37
+
38
+industry = ['家用电器', '元器件', 'IT设备', '汽车服务',
39
+            '汽车配件', '软件服务',
40
+            '互联网', '纺织',
41
+            '塑料', '半导体',]
42
+
43
+A_concept_code_list = [   'TS2', # 5G
44
+                        'TS24', # OLED
45
+                        'TS26', #健康中国
46
+                        'TS43',  #新能源整车
47
+                        'TS59', # 特斯拉
48
+                        'TS65', #汽车整车
49
+                        'TS142', # 物联网
50
+                        'TS153', # 无人驾驶
51
+                        'TS163', # 雄安板块-智慧城市
52
+                        'TS175', # 工业自动化
53
+                        'TS232', # 新能源汽车
54
+                        'TS254', # 人工智能
55
+                        'TS258', # 互联网医疗
56
+                        'TS264', # 工业互联网
57
+                        'TS266', # 半导体
58
+                        'TS269', # 智慧城市
59
+                        'TS271', # 3D玻璃
60
+                        'TS295', # 国产芯片
61
+                        'TS303', # 医疗信息化
62
+                        'TS323', # 充电桩
63
+                        'TS328', # 虹膜识别
64
+                        'TS361', # 病毒
65
+    ]
66
+
67
+
68
+gainian_map = {}
69
+hangye_map = {}
70
+
71
+def predict_today(file, day, model='10_18d', log=True):
72
+    lines = []
73
+    with open(file) as f:
74
+        for line in f.readlines()[:]:
75
+            line = eval(line.strip())
76
+            # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
77
+            lines.append(line)
78
+
79
+    size = len(lines[0])
80
+
81
+    model=load_model(model)
82
+
83
+    for line in lines:
84
+        train_x = np.array([line[:size - 1]])
85
+        train_x_a = train_x[:,:18*24]
86
+        train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
87
+        # train_x_b = train_x[:, 18*18:18*18+2*18]
88
+        # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
89
+        train_x_c = train_x[:,18*24:]
90
+
91
+        result = model.predict([train_x_c, train_x_a])
92
+        # print(result, line[-1])
93
+        stock = code_table.find_one({'ts_code':line[-1][0]})
94
+
95
+        if result[0][0] + result[0][1] > 0.5:
96
+            if line[-1][0].startswith('688'):
97
+                continue
98
+            # 去掉ST
99
+            if stock['name'].startswith('ST') or stock['name'].startswith('N') or stock['name'].startswith('*'):
100
+                continue
101
+
102
+            if stock['ts_code'] in holder_stock_list:
103
+                print(stock['ts_code'], stock['name'], '维持买入评级')
104
+
105
+            # 跌的
106
+            k_table_list = list(k_table.find({'code':line[-1][0], 'tradeDate':{'$lte':day}}).sort("tradeDate", pymongo.DESCENDING).limit(5))
107
+            # if k_table_list[0]['close'] > k_table_list[-1]['close']*1.20:
108
+            #     continue
109
+            # if k_table_list[0]['close'] < k_table_list[-1]['close']*0.90:
110
+            #     continue
111
+            # if k_table_list[-1]['close'] > 80:
112
+            #     continue
113
+
114
+            # 指定某几个行业
115
+            # if stock['industry'] in industry:
116
+            concept_code_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
117
+            concept_detail_list = []
118
+
119
+            # 处理行业
120
+            if stock['sw_industry'] in hangye_map:
121
+                i_c = hangye_map[stock['sw_industry']]
122
+                hangye_map[stock['sw_industry']] = i_c + 1
123
+            else:
124
+                hangye_map[stock['sw_industry']] = 1
125
+
126
+            if len(concept_code_list) > 0:
127
+                for concept in concept_code_list:
128
+                    for c in all_concept_code_list:
129
+                        if c['code'] == concept['concept_code']:
130
+                            concept_detail_list.append(c['name'])
131
+
132
+                            if c['name'] in gainian_map:
133
+                                g_c = gainian_map[c['name']]
134
+                                gainian_map[c['name']] = g_c + 1
135
+                            else:
136
+                                gainian_map[c['name']] = 1
137
+
138
+            print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
139
+
140
+            if log is True:
141
+                with open('D:\\data\\quantization\\predict\\' + str(day) + '_lstm.txt', mode='a', encoding="utf-8") as f:
142
+                    f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
143
+
144
+        elif result[0][2] > 0.5:
145
+            if stock['ts_code'] in holder_stock_list:
146
+                print(stock['ts_code'], stock['name'], '震荡评级')
147
+
148
+        elif result[0][3] + result[0][4] > 0.5:
149
+            if stock['ts_code'] in holder_stock_list:
150
+                print(stock['ts_code'], stock['name'], '赶紧卖出')
151
+        else:
152
+            if stock['ts_code'] in holder_stock_list:
153
+                print(stock['ts_code'], stock['name'], result[0],)
154
+
155
+    # print(gainian_map)
156
+    # print(hangye_map)
157
+
158
+    gainian_list = [(key, gainian_map[key])for key in gainian_map]
159
+    gainian_list = sorted(gainian_list, key=lambda x:x[1], reverse=True)
160
+
161
+    hangye_list = [(key, hangye_map[key])for key in hangye_map]
162
+    hangye_list = sorted(hangye_list, key=lambda x:x[1], reverse=True)
163
+
164
+    print(gainian_list)
165
+    print(hangye_list)
166
+
167
+def _read_pfile_map(path):
168
+    s_list = []
169
+    with open(path, encoding='utf-8') as f:
170
+        for line in f.readlines()[:]:
171
+            s_list.append(line)
172
+    return s_list
173
+
174
+
175
+def join_two_day(a, b):
176
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(a) + '.txt')
177
+    b_list = _read_pfile_map('D:\\data\\quantization\\predict\\dmi_' + str(b) + '.txt')
178
+    for a in a_list:
179
+        for b in b_list:
180
+            if a[2:11] == b[2:11]:
181
+                print(a)
182
+
183
+
184
+def check_everyday(day, today):
185
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(day) + '.txt')
186
+    x = 0
187
+    for a in a_list:
188
+        print(a[:-1])
189
+        k_day_list = list(k_table.find({'code':a[2:11], 'tradeDate':{'$lte':int(today)}}).sort('tradeDate', pymongo.DESCENDING).limit(5))
190
+        if k_day_list is not None and len(k_day_list) > 0:
191
+            k_day = k_day_list[0]
192
+            k_day_0 = k_day_list[-1]
193
+            k_day_last = k_day_list[1]
194
+            if ((k_day_last['close'] - k_day_0['pre_close'])/k_day_0['pre_close']) < 0.2:
195
+                print(k_day['open'], k_day['close'], 100*(k_day['close'] - k_day_last['close'])/k_day_last['close'])
196
+                x = x + 100*(k_day['close'] - k_day_last['close'])/k_day_last['close']
197
+
198
+    print(x/len(a_list))
199
+
200
+
201
+if __name__ == '__main__':
202
+    # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
203
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
204
+    # multi_predict()
205
+    predict_today("D:\\data\\quantization\\stock160_18d_20200312.log", 20200313, model='160_18d_lstm_5D_ma5_s_seq.h5', log=True)
206
+    # join_two_day(20200305, 20200305)
207
+    # check_everyday(20200311, 20200312)

+ 13 - 11
mix/lstm_train.py

@@ -20,8 +20,10 @@ from keras.callbacks import EarlyStopping
20
 
20
 
21
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
21
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
22
 
22
 
23
-epochs= 200
24
-size = 380000
23
+epochs= 440
24
+size = 580000  # 61W
25
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
26
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
25
 
27
 
26
 def read_data(path):
28
 def read_data(path):
27
     lines = []
29
     lines = []
@@ -49,7 +51,7 @@ def read_data(path):
49
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
51
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50
 
52
 
51
 
53
 
52
-train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock17_18d_train.log")
54
+train_x,train_y,test_x,test_y=read_data(file_path)
53
 
55
 
54
 train_x_a = train_x[:,:18*24]
56
 train_x_a = train_x[:,:18*24]
55
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
57
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
@@ -61,8 +63,8 @@ train_x_c = train_x[:,18*24:]
61
 def create_mlp(dim, regress=False):
63
 def create_mlp(dim, regress=False):
62
     # define our MLP network
64
     # define our MLP network
63
     model = Sequential()
65
     model = Sequential()
64
-    model.add(Dense(16, input_dim=dim, activation="relu"))
65
-    model.add(Dense(16, activation="relu"))
66
+    model.add(Dense(64, input_dim=dim, activation="relu"))
67
+    model.add(Dense(64, activation="relu"))
66
 
68
 
67
     # check to see if the regression node should be added
69
     # check to see if the regression node should be added
68
     if regress:
70
     if regress:
@@ -151,10 +153,10 @@ combinedInput = concatenate([mlp.output, cnn_0.output,])
151
 
153
 
152
 # our final FC layer head will have two dense layers, the final one
154
 # our final FC layer head will have two dense layers, the final one
153
 # being our regression head
155
 # being our regression head
154
-x = Dense(666, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
156
+x = Dense(256, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
155
 x = Dropout(0.2)(x)
157
 x = Dropout(0.2)(x)
156
-x = Dense(666, activation="relu")(x)
157
-x = Dense(666, activation="relu")(x)
158
+x = Dense(256, activation="relu")(x)
159
+x = Dense(512, activation="relu")(x)
158
 # 在建设一层
160
 # 在建设一层
159
 x = Dense(5, activation="softmax")(x)
161
 x = Dense(5, activation="softmax")(x)
160
 
162
 
@@ -181,7 +183,7 @@ model.fit(
181
     # validation_data=([testAttrX, testImagesX], testY),
183
     # validation_data=([testAttrX, testImagesX], testY),
182
     # epochs=int(3*train_x_a.shape[0]/1300),
184
     # epochs=int(3*train_x_a.shape[0]/1300),
183
     epochs=epochs,
185
     epochs=epochs,
184
-    batch_size=2048, shuffle=True,
186
+    batch_size=4096, shuffle=True,
185
     callbacks=[early_stopping]
187
     callbacks=[early_stopping]
186
 )
188
 )
187
 
189
 
@@ -199,6 +201,6 @@ print(score)
199
 print('Test score:', score[0])
201
 print('Test score:', score[0])
200
 print('Test accuracy:', score[1])
202
 print('Test accuracy:', score[1])
201
 
203
 
202
-path="17_18d_lstm_seq.h5"
203
-model.save(path)
204
+
205
+model.save(model_path)
204
 model=None
206
 model=None

+ 125 - 0
mix/mix_kmeans_predict_1.py

@@ -0,0 +1,125 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
7
+data_dir = 'D:\\data\\quantization\\'
8
+kmeans = 'roc'
9
+
10
+
11
+def read_data(path):
12
+    lines = []
13
+    with open(path) as f:
14
+        for line in f.readlines()[:]:
15
+            line = eval(line.strip())
16
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
17
+                lines.append(line)
18
+
19
+    size = len(lines[0])
20
+    train_x=[s[:size - 2] for s in lines]
21
+    train_y=[s[size-1] for s in lines]
22
+    return np.array(train_x),np.array(train_y),lines
23
+
24
+
25
+def _score(fact, line):
26
+    with open('mix_predict_dmi_18d.txt', 'a') as f:
27
+        f.write(str([line[-2], line[-1]]) + "\n")
28
+
29
+    up_right = 0
30
+    up_error = 0
31
+
32
+    if fact[0] == 1:
33
+        up_right = up_right + 1.12
34
+    elif fact[1] == 1:
35
+        up_right = up_right + 1.06
36
+    elif fact[2] == 1:
37
+        up_right = up_right + 1
38
+        up_error = up_error + 0.5
39
+    elif fact[3] == 1:
40
+        up_right = up_right + 0.94
41
+        up_error = up_error + 1
42
+    else:
43
+        up_error = up_error + 1
44
+        up_right = up_right + 0.88
45
+    return up_right,up_error
46
+
47
+
48
+def mul_predict(name="10_18d"):
49
+    r = 0
50
+    p = 0
51
+
52
+    for x in range(0, 8):
53
+        win_dnn, up_ratio,down_ratio  = predict(data_dir + kmeans + '\\stock160_18d_train1_B_' + str(x) + ".log", x) # stock160_18d_trai_0
54
+
55
+        r = r + up_ratio
56
+        p = p + down_ratio
57
+
58
+    print(r, p)
59
+
60
+
61
+def predict(file_path='', idx=-1):
62
+    test_x,test_y,lines=read_data(file_path)
63
+    print(idx, 'Load data success')
64
+
65
+    test_x_a = test_x[:,:18*24]
66
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
67
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
68
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
69
+    test_x_c = test_x[:,18*24:]
70
+
71
+    model=load_model(model_path.split('.')[0] + '_' + str(idx) + '.h5')
72
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
73
+    print('MIX', score)
74
+
75
+    up_num = 0
76
+    up_error = 0
77
+    up_right = 0
78
+    down_num = 0
79
+    down_error = 0
80
+    down_right = 0
81
+    i = 0
82
+    result=model.predict([test_x_c, test_x_a, ])
83
+    win_dnn = []
84
+    for r in result:
85
+        fact = test_y[i]
86
+
87
+        if idx in [-2]:
88
+            if r[0] > 0.5 or r[1] > 0.5:
89
+                pass
90
+        else:
91
+            if r[0] > 0.6 or r[1] > 0.6:
92
+                tmp_right,tmp_error = _score(fact, lines[i])
93
+                up_right = tmp_right + up_right
94
+                up_error = tmp_error + up_error
95
+                up_num = up_num + 1
96
+            elif r[3] > 0.7 or r[4] > 0.7:
97
+                if fact[0] == 1:
98
+                    down_error = down_error + 1
99
+                    down_right = down_right + 1.12
100
+                elif fact[1] == 1:
101
+                    down_error = down_error + 1
102
+                    down_right = down_right + 1.06
103
+                elif fact[2] == 1:
104
+                    down_error = down_error + 0.5
105
+                    down_right = down_right + 1
106
+                elif fact[3] == 1:
107
+                    down_right = down_right + 0.94
108
+                else:
109
+                    down_right = down_right + 0.88
110
+                down_num = down_num + 1
111
+
112
+        i = i + 1
113
+    if up_num == 0:
114
+        up_num = 1
115
+    if down_num == 0:
116
+        down_num = 1
117
+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
118
+    return win_dnn,up_right/up_num,down_right/down_num
119
+
120
+
121
+if __name__ == '__main__':
122
+    # predict(file_path='D:\\data\\quantization\\stock160_18d_10D_test.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
123
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
124
+    mul_predict(name='stock160_18d')
125
+    # predict_today(20200229, model='11_18d')

+ 187 - 0
mix/mix_kmeans_train_1.py

@@ -0,0 +1,187 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+
18
+epochs= 50
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_10D_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
22
+data_dir = 'D:\\data\\quantization\\'
23
+
24
+
25
+def read_data(path):
26
+    lines = []
27
+    with open(path) as f:
28
+        i = 0
29
+        for line in f.readlines()[:]:
30
+            lines.append(eval(line.strip()))
31
+
32
+    random.shuffle(lines)
33
+    print('读取数据完毕')
34
+
35
+    d=int(0.7*len(lines))
36
+
37
+    train_x=[s[:-2] for s in lines[0:d]]
38
+    train_y=[s[-1] for s in lines[0:d]]
39
+    test_x=[s[:-2] for s in lines[d:]]
40
+    test_y=[s[-1] for s in lines[d:]]
41
+
42
+    print('转换数据完毕')
43
+
44
+    ros = RandomOverSampler(random_state=0)
45
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
46
+
47
+    print('数据重采样完毕')
48
+
49
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50
+
51
+
52
+def mul_train(name="10_18d"):
53
+    for x in range(0, 8):
54
+        score = train(data_dir + 'kmeans\\' + name + "_trai_" + str(x) + ".log", x) # stock160_18d_trai_0
55
+
56
+        with open(data_dir + name + '_mix.log', 'a') as f:
57
+            f.write(str(x) + ':' + str(score[1]) + '\n')
58
+
59
+
60
+def train(file_path_name, idx):
61
+    train_x,train_y,test_x,test_y=read_data(file_path_name)
62
+
63
+    train_x_a = train_x[:,:18*24]
64
+    train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
65
+    # train_x_b = train_x[:, 18*18:18*18+2*18]
66
+    # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
67
+    train_x_c = train_x[:,18*24:]
68
+
69
+    # create the MLP and CNN models
70
+    mlp = create_mlp(train_x_c.shape[1], regress=False)
71
+    cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
72
+    # cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
73
+
74
+    # create the input to our final set of layers as the *output* of both
75
+    # the MLP and CNN
76
+    combinedInput = concatenate([mlp.output, cnn_0.output])
77
+
78
+    # our final FC layer head will have two dense layers, the final one
79
+    # being our regression head
80
+    x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
81
+    x = Dropout(0.2)(x)
82
+    x = Dense(512, activation="relu")(x)
83
+    x = Dense(512, activation="relu")(x)
84
+    # 在建设一层
85
+    x = Dense(5, activation="softmax")(x)
86
+
87
+    # our final model will accept categorical/numerical data on the MLP
88
+    # input and images on the CNN input, outputting a single value (the
89
+    # predicted price of the house)
90
+    model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
91
+
92
+
93
+    print("Starting training ")
94
+    # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
95
+
96
+    # compile the model using mean absolute percentage error as our loss,
97
+    # implying that we seek to minimize the absolute percentage difference
98
+    # between our price *predictions* and the *actual prices*
99
+    opt = Adam(lr=1e-3, decay=1e-3 / 200)
100
+    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
101
+
102
+    # train the model
103
+    print("[INFO] training model...")
104
+    model.fit(
105
+        [train_x_c, train_x_a], train_y,
106
+        # validation_data=([testAttrX, testImagesX], testY),
107
+        # epochs=int(3*train_x_a.shape[0]/1300),
108
+        epochs=epochs,
109
+        batch_size=2048, shuffle=True)
110
+
111
+    test_x_a = test_x[:,:18*24]
112
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
113
+    # test_x_b = test_x[:, 18*18:18*18+2*18]
114
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
115
+    test_x_c = test_x[:,18*24:]
116
+
117
+    # make predictions on the testing data
118
+    print("[INFO] predicting house prices...")
119
+    score  = model.evaluate([test_x_c, test_x_a], test_y)
120
+
121
+    print(score)
122
+    print('Test score:', score[0])
123
+    print('Test accuracy:', score[1])
124
+
125
+    model.save(model_path.split('.')[0] + '_' + str(idx) + '.h5')
126
+    return score
127
+
128
+def create_mlp(dim, regress=False):
129
+    # define our MLP network
130
+    model = Sequential()
131
+    model.add(Dense(64, input_dim=dim, activation="relu"))
132
+    model.add(Dense(64, activation="relu"))
133
+
134
+    # check to see if the regression node should be added
135
+    if regress:
136
+        model.add(Dense(1, activation="linear"))
137
+
138
+    # return our model
139
+    return model
140
+
141
+
142
+def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
143
+    # initialize the input shape and channel dimension, assuming
144
+    # TensorFlow/channels-last ordering
145
+    inputShape = (width, height, 1)
146
+    chanDim = -1
147
+
148
+    # define the model input
149
+    inputs = Input(shape=inputShape)
150
+
151
+    x = inputs
152
+
153
+    # CONV => RELU => BN => POOL
154
+    x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
155
+    x = Activation("relu")(x)
156
+    x = BatchNormalization(axis=chanDim)(x)
157
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
158
+    # if width > 2:
159
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
160
+    #     x = Activation("relu")(x)
161
+    #     x = BatchNormalization(axis=chanDim)(x)
162
+
163
+    # flatten the volume, then FC => RELU => BN => DROPOUT
164
+    x = Flatten()(x)
165
+    x = Dense(output)(x)
166
+    x = Activation("relu")(x)
167
+    x = BatchNormalization(axis=chanDim)(x)
168
+    x = Dropout(0.2)(x)
169
+
170
+    # apply another FC layer, this one to match the number of nodes
171
+    # coming out of the MLP
172
+    x = Dense(output)(x)
173
+    x = Activation("relu")(x)
174
+
175
+    # check to see if the regression node should be added
176
+    if regress:
177
+        x = Dense(1, activation="linear")(x)
178
+
179
+    # construct the CNN
180
+    model = Model(inputs, x)
181
+
182
+    # return the CNN
183
+    return model
184
+
185
+
186
+if __name__ == '__main__':
187
+    mul_train('stock160_18d')

+ 1 - 1
mix/mix_predict_1.py

@@ -100,7 +100,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
100
 
100
 
101
 
101
 
102
 if __name__ == '__main__':
102
 if __name__ == '__main__':
103
-    predict(file_path='D:\\data\\quantization\\stock16_18d_test_close.log', model_path='16_18d_mix_seq.h5')
103
+    predict(file_path='D:\\data\\quantization\\stock160_18d_train1.log', model_path='160_18d_mix_5D_ma5_s_seq.h5')
104
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
104
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
105
     # multi_predict(model='15_18d')
105
     # multi_predict(model='15_18d')
106
     # predict_today(20200229, model='11_18d')
106
     # predict_today(20200229, model='11_18d')

+ 26 - 8
mix/mix_predict_by_day.py

@@ -33,6 +33,11 @@ def predict(file_path='', model_path='15min_dnn_seq'):
33
         up_num = 0
33
         up_num = 0
34
         down_num = 0
34
         down_num = 0
35
         size = len(lines[0])
35
         size = len(lines[0])
36
+        x0 = 0
37
+        x1 = 0
38
+        x2 = 0
39
+        x3 = 0
40
+        x4 = 0
36
 
41
 
37
         for line in lines:
42
         for line in lines:
38
             train_x = np.array([line[:size - 1]])
43
             train_x = np.array([line[:size - 1]])
@@ -44,15 +49,28 @@ def predict(file_path='', model_path='15min_dnn_seq'):
44
 
49
 
45
             result = model.predict([train_x_c, train_x_a])
50
             result = model.predict([train_x_c, train_x_a])
46
 
51
 
47
-            if result[0][3] > 0.5 or result[0][4] > 0.5:
52
+            if result[0][3] + result[0][4] > 0.5:
48
                 down_num = down_num + 1
53
                 down_num = down_num + 1
49
-            elif result[0][1] > 0.5 or result[0][0] > 0.5:
50
-                up_num = up_num + 1
51
-            else:
52
-                up_num = up_num + 0.4 # 乐观调大 悲观调小
53
-                down_num = down_num + 0.6
54
+            elif result[0][1] + result[0][0] > 0.5:
55
+                up_num = up_num + 0.6
56
+            # else:
57
+            #     up_num = up_num + 0.4 # 乐观调大 悲观调小
58
+            #     down_num = down_num + 0.6
59
+
60
+            maxx = max(result[0])
61
+            if maxx - result[0][0] == 0:
62
+                x0 = x0 + 1
63
+            if maxx - result[0][1] == 0:
64
+                x1 = x1 + 1
65
+            if maxx - result[0][2] == 0:
66
+                x2 = x2 + 1
67
+            if maxx - result[0][3] == 0:
68
+                x3 = x3 + 1
69
+            if maxx - result[0][4] == 0:
70
+                x4 = x4 + 1
54
 
71
 
55
-        print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2))
72
+        # print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2), )
73
+        print(key, x0, x1, x2,x3,x4)
56
 
74
 
57
 
75
 
58
 if __name__ == '__main__':
76
 if __name__ == '__main__':
@@ -60,5 +78,5 @@ if __name__ == '__main__':
60
     # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
78
     # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
61
     # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
79
     # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
62
     # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
80
     # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
63
-    predict(file_path='D:\\data\\quantization\\stock16_18d_20191225_20200310_1.log', model_path='16_18d_mix_seq')
81
+    predict(file_path='D:\\data\\quantization\\stock160_18d_10D_20200313.log', model_path='160_18d_mix_10D_ma5_s_seq')
64
     # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')
82
     # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 51 - 33
mix/mix_predict_everyday.py

@@ -5,15 +5,11 @@ import joblib
5
 
5
 
6
 
6
 
7
 holder_stock_list = [
7
 holder_stock_list = [
8
-                        '000063.SZ',
9
-                        '002373.SZ',
10
-                        '300253.SZ',
11
-                        '300059.SZ',
12
-                        '300807.SZ',
13
-                        '600345.SH',
8
+                        '000063.SZ', '002093.SZ'
9
+                        '300253.SZ', '300807.SZ',
10
+
14
                          # b账户
11
                          # b账户
15
-                        '300422.SZ',
16
-                        '300468.SZ',
12
+
17
     ]
13
     ]
18
 
14
 
19
 
15
 
@@ -72,9 +68,9 @@ A_concept_code_list = [   'TS2', # 5G
72
 gainian_map = {}
68
 gainian_map = {}
73
 hangye_map = {}
69
 hangye_map = {}
74
 
70
 
75
-def predict_today(day, model='10_18d', log=True):
71
+def predict_today(file, day, model='10_18d', log=True):
76
     lines = []
72
     lines = []
77
-    with open('D:\\data\\quantization\\stock' + model[:-4] + '_' +  str(day) +'.log') as f:
73
+    with open(file) as f:
78
         for line in f.readlines()[:]:
74
         for line in f.readlines()[:]:
79
             line = eval(line.strip())
75
             line = eval(line.strip())
80
             # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
76
             # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
@@ -82,7 +78,7 @@ def predict_today(day, model='10_18d', log=True):
82
 
78
 
83
     size = len(lines[0])
79
     size = len(lines[0])
84
 
80
 
85
-    model=load_model(model + '_seq.h5')
81
+    model=load_model(model)
86
 
82
 
87
     for line in lines:
83
     for line in lines:
88
         train_x = np.array([line[:size - 1]])
84
         train_x = np.array([line[:size - 1]])
@@ -96,7 +92,7 @@ def predict_today(day, model='10_18d', log=True):
96
         # print(result, line[-1])
92
         # print(result, line[-1])
97
         stock = code_table.find_one({'ts_code':line[-1][0]})
93
         stock = code_table.find_one({'ts_code':line[-1][0]})
98
 
94
 
99
-        if result[0][0] > 0.5 or result[0][1] > 0.5:
95
+        if result[0][0] + result[0][1] > 0.5:
100
             if line[-1][0].startswith('688'):
96
             if line[-1][0].startswith('688'):
101
                 continue
97
                 continue
102
             # 去掉ST
98
             # 去掉ST
@@ -127,42 +123,46 @@ def predict_today(day, model='10_18d', log=True):
127
             else:
123
             else:
128
                 hangye_map[stock['sw_industry']] = 1
124
                 hangye_map[stock['sw_industry']] = 1
129
 
125
 
130
-            # if len(concept_code_list) > 0:
131
-            #     for concept in concept_code_list:
132
-            #         for c in all_concept_code_list:
133
-            #             if c['code'] == concept['concept_code']:
134
-            #                 concept_detail_list.append(c['name'])
135
-            #
136
-            #                 if c['name'] in gainian_map:
137
-            #                     g_c = gainian_map[c['name']]
138
-            #                     gainian_map[c['name']] = g_c + 1
139
-            #                 else:
140
-            #                     gainian_map[c['name']] = 1
126
+            if len(concept_code_list) > 0:
127
+                for concept in concept_code_list:
128
+                    for c in all_concept_code_list:
129
+                        if c['code'] == concept['concept_code']:
130
+                            concept_detail_list.append(c['name'])
131
+
132
+                            if c['name'] in gainian_map:
133
+                                g_c = gainian_map[c['name']]
134
+                                gainian_map[c['name']] = g_c + 1
135
+                            else:
136
+                                gainian_map[c['name']] = 1
141
 
137
 
142
             print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
138
             print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
143
 
139
 
144
             if log is True:
140
             if log is True:
145
-                with open('D:\\data\\quantization\\predict\\' + str(day) + '.txt', mode='a', encoding="utf-8") as f:
141
+                with open('D:\\data\\quantization\\predict\\' + str(day) + '_mix.txt', mode='a', encoding="utf-8") as f:
146
                     f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
142
                     f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
147
 
143
 
148
-
149
-            # concept_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
150
-            # concept_list = [c['concept_code'] for c in concept_list]
151
-
152
         elif result[0][2] > 0.5:
144
         elif result[0][2] > 0.5:
153
             if stock['ts_code'] in holder_stock_list:
145
             if stock['ts_code'] in holder_stock_list:
154
                 print(stock['ts_code'], stock['name'], '震荡评级')
146
                 print(stock['ts_code'], stock['name'], '震荡评级')
155
 
147
 
156
-        elif result[0][3] > 0.5 or result[0][4] > 0.5:
148
+        elif result[0][3] + result[0][4] > 0.5:
157
             if stock['ts_code'] in holder_stock_list:
149
             if stock['ts_code'] in holder_stock_list:
158
                 print(stock['ts_code'], stock['name'], '赶紧卖出')
150
                 print(stock['ts_code'], stock['name'], '赶紧卖出')
159
         else:
151
         else:
160
             if stock['ts_code'] in holder_stock_list:
152
             if stock['ts_code'] in holder_stock_list:
161
                 print(stock['ts_code'], stock['name'], result[0],)
153
                 print(stock['ts_code'], stock['name'], result[0],)
162
 
154
 
163
-    print(gainian_map)
164
-    print(hangye_map)
155
+    # print(gainian_map)
156
+    # print(hangye_map)
157
+
158
+    gainian_list = [(key, gainian_map[key])for key in gainian_map]
159
+    gainian_list = sorted(gainian_list, key=lambda x:x[1], reverse=True)
165
 
160
 
161
+    hangye_list = [(key, hangye_map[key])for key in hangye_map]
162
+    hangye_list = sorted(hangye_list, key=lambda x:x[1], reverse=True)
163
+
164
+    print(gainian_list)
165
+    print(hangye_list)
166
 
166
 
167
 def _read_pfile_map(path):
167
 def _read_pfile_map(path):
168
     s_list = []
168
     s_list = []
@@ -181,9 +181,27 @@ def join_two_day(a, b):
181
                 print(a)
181
                 print(a)
182
 
182
 
183
 
183
 
184
+def check_everyday(day, today):
185
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(day) + '.txt')
186
+    x = 0
187
+    for a in a_list:
188
+        print(a[:-1])
189
+        k_day_list = list(k_table.find({'code':a[2:11], 'tradeDate':{'$lte':int(today)}}).sort('tradeDate', pymongo.DESCENDING).limit(5))
190
+        if k_day_list is not None and len(k_day_list) > 0:
191
+            k_day = k_day_list[0]
192
+            k_day_0 = k_day_list[-1]
193
+            k_day_last = k_day_list[1]
194
+            if ((k_day_last['close'] - k_day_0['pre_close'])/k_day_0['pre_close']) < 0.2:
195
+                print(k_day['open'], k_day['close'], 100*(k_day['close'] - k_day_last['close'])/k_day_last['close'])
196
+                x = x + 100*(k_day['close'] - k_day_last['close'])/k_day_last['close']
197
+
198
+    print(x/len(a_list))
199
+
200
+
184
 if __name__ == '__main__':
201
 if __name__ == '__main__':
185
     # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
202
     # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
186
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
203
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
187
     # multi_predict()
204
     # multi_predict()
188
-    predict_today(20200310, model='16_18d_mix', log=True)
189
-    # join_two_day(20200305, 20200305)
205
+    predict_today("D:\\data\\quantization\\stock160_18d_10D_20200313.log", 20200313, model='160_18d_mix_5D_ma5_s_seq.h5', log=True)
206
+    # join_two_day(20200305, 20200305)
207
+    # check_everyday(20200311, 20200312)

+ 4 - 3
mix/mix_train.py

@@ -21,7 +21,8 @@ early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
21
 
21
 
22
 epochs= 120
22
 epochs= 120
23
 size = 380000
23
 size = 380000
24
-
24
+model_path="160_18d_mix_5D_ma5_s_seq.h5"
25
+# 已废弃
25
 
26
 
26
 def read_data(path):
27
 def read_data(path):
27
     lines = []
28
     lines = []
@@ -176,6 +177,6 @@ print(score)
176
 print('Test score:', score[0])
177
 print('Test score:', score[0])
177
 print('Test accuracy:', score[1])
178
 print('Test accuracy:', score[1])
178
 
179
 
179
-path="16_18d_mix_seq.h5"
180
-model.save(path)
180
+
181
+model.save(model_path)
181
 model=None
182
 model=None

+ 10 - 8
mix/mix_train_1.py

@@ -15,15 +15,19 @@ from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,I
15
 from keras import regularizers
15
 from keras import regularizers
16
 from keras.models import Model
16
 from keras.models import Model
17
 
17
 
18
-epochs= 130
19
-size = 380000
18
+epochs= 440
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
20
 
22
 
21
 
23
 
22
 def read_data(path):
24
 def read_data(path):
23
     lines = []
25
     lines = []
24
     with open(path) as f:
26
     with open(path) as f:
25
-        for x in range(size): #380000
26
-            lines.append(eval(f.readline().strip()))
27
+        i = 0
28
+        for x in range(size): #610000
29
+            line = eval(f.readline().strip())
30
+            lines.append(line)
27
 
31
 
28
     random.shuffle(lines)
32
     random.shuffle(lines)
29
     print('读取数据完毕')
33
     print('读取数据完毕')
@@ -45,7 +49,7 @@ def read_data(path):
45
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
49
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
46
 
50
 
47
 
51
 
48
-train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
52
+train_x,train_y,test_x,test_y=read_data(file_path)
49
 
53
 
50
 train_x_a = train_x[:,:18*24]
54
 train_x_a = train_x[:,:18*24]
51
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
55
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
@@ -168,6 +172,4 @@ print(score)
168
 print('Test score:', score[0])
172
 print('Test score:', score[0])
169
 print('Test accuracy:', score[1])
173
 print('Test accuracy:', score[1])
170
 
174
 
171
-path="16_18d_mix_seq.h5"
172
-model.save(path)
173
-model=None
175
+model.save(model_path)

+ 175 - 0
mix/mix_train_2.py

@@ -0,0 +1,175 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+
18
+epochs= 440
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
22
+
23
+
24
+def read_data(path):
25
+    lines = []
26
+    with open(path) as f:
27
+        i = 0
28
+        for x in range(size): #610000
29
+            line = eval(f.readline().strip())
30
+            lines.append(line)
31
+
32
+    random.shuffle(lines)
33
+    print('读取数据完毕')
34
+
35
+    d=int(0.7*len(lines))
36
+
37
+    train_x=[s[:-2] for s in lines[0:d]]
38
+    train_y=[s[-1] for s in lines[0:d]]
39
+    test_x=[s[:-2] for s in lines[d:]]
40
+    test_y=[s[-1] for s in lines[d:]]
41
+
42
+    print('转换数据完毕')
43
+
44
+    ros = RandomOverSampler(random_state=0)
45
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
46
+
47
+    print('数据重采样完毕')
48
+
49
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50
+
51
+
52
+train_x,train_y,test_x,test_y=read_data(file_path)
53
+
54
+train_x_a = train_x[:,:18*24]
55
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
56
+# train_x_b = train_x[:, 18*18:18*18+2*18]
57
+# train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
58
+train_x_c = train_x[:,18*24:]
59
+
60
+
61
+def create_mlp(dim, regress=False):
62
+    # define our MLP network
63
+    model = Sequential()
64
+    model.add(Dense(64, input_dim=dim, activation="relu"))
65
+    model.add(Dense(64, activation="relu"))
66
+
67
+    # check to see if the regression node should be added
68
+    if regress:
69
+        model.add(Dense(1, activation="linear"))
70
+
71
+    # return our model
72
+    return model
73
+
74
+
75
+def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
76
+    # initialize the input shape and channel dimension, assuming
77
+    # TensorFlow/channels-last ordering
78
+    inputShape = (width, height, 1)
79
+    chanDim = -1
80
+
81
+    # define the model input
82
+    inputs = Input(shape=inputShape)
83
+
84
+    x = inputs
85
+
86
+    # CONV => RELU => BN => POOL
87
+    x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
88
+    x = Activation("relu")(x)
89
+    x = BatchNormalization(axis=chanDim)(x)
90
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
91
+    # if width > 2:
92
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
93
+    #     x = Activation("relu")(x)
94
+    #     x = BatchNormalization(axis=chanDim)(x)
95
+
96
+    # flatten the volume, then FC => RELU => BN => DROPOUT
97
+    x = Flatten()(x)
98
+    x = Dense(output)(x)
99
+    x = Activation("relu")(x)
100
+    x = BatchNormalization(axis=chanDim)(x)
101
+    x = Dropout(0.2)(x)
102
+
103
+    # apply another FC layer, this one to match the number of nodes
104
+    # coming out of the MLP
105
+    x = Dense(output)(x)
106
+    x = Activation("relu")(x)
107
+
108
+    # check to see if the regression node should be added
109
+    if regress:
110
+        x = Dense(1, activation="linear")(x)
111
+
112
+    # construct the CNN
113
+    model = Model(inputs, x)
114
+
115
+    # return the CNN
116
+    return model
117
+
118
+
119
+# create the MLP and CNN models
120
+mlp = create_mlp(train_x_c.shape[1], regress=False)
121
+cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
122
+# cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
123
+
124
+# create the input to our final set of layers as the *output* of both
125
+# the MLP and CNN
126
+combinedInput = concatenate([mlp.output, cnn_0.output])
127
+
128
+# our final FC layer head will have two dense layers, the final one
129
+# being our regression head
130
+x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
131
+x = Dropout(0.2)(x)
132
+x = Dense(512, activation="relu")(x)
133
+x = Dense(512, activation="relu")(x)
134
+# 在建设一层
135
+x = Dense(5, activation="softmax")(x)
136
+
137
+# our final model will accept categorical/numerical data on the MLP
138
+# input and images on the CNN input, outputting a single value (the
139
+# predicted price of the house)
140
+model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
141
+
142
+
143
+print("Starting training ")
144
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
145
+
146
+# compile the model using mean absolute percentage error as our loss,
147
+# implying that we seek to minimize the absolute percentage difference
148
+# between our price *predictions* and the *actual prices*
149
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
150
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
151
+
152
+# train the model
153
+print("[INFO] training model...")
154
+model.fit(
155
+    [train_x_c, train_x_a], train_y,
156
+    # validation_data=([testAttrX, testImagesX], testY),
157
+    # epochs=int(3*train_x_a.shape[0]/1300),
158
+    epochs=epochs,
159
+    batch_size=2048, shuffle=True)
160
+
161
+test_x_a = test_x[:,:18*24]
162
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
163
+# test_x_b = test_x[:, 18*18:18*18+2*18]
164
+# test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
165
+test_x_c = test_x[:,18*24:]
166
+
167
+# make predictions on the testing data
168
+print("[INFO] predicting house prices...")
169
+score  = model.evaluate([test_x_c, test_x_a], test_y)
170
+
171
+print(score)
172
+print('Test score:', score[0])
173
+print('Test accuracy:', score[1])
174
+
175
+model.save(model_path)