Browse Source

比较lstm kmeans后分类的情况

yufeng 4 years ago
parent
commit
6ce2ca916d

+ 125 - 0
mix/lstm_kmeans_predict.py

@@ -0,0 +1,125 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
7
+data_dir = 'D:\\data\\quantization\\'
8
+kmeans = 'roc'
9
+
10
+
11
+def read_data(path):
12
+    lines = []
13
+    with open(path) as f:
14
+        for line in f.readlines()[:]:
15
+            line = eval(line.strip())
16
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
17
+                lines.append(line)
18
+
19
+    size = len(lines[0])
20
+    train_x=[s[:size - 2] for s in lines]
21
+    train_y=[s[size-1] for s in lines]
22
+    return np.array(train_x),np.array(train_y),lines
23
+
24
+
25
+def _score(fact, line):
26
+    with open('mix_predict_dmi_18d.txt', 'a') as f:
27
+        f.write(str([line[-2], line[-1]]) + "\n")
28
+
29
+    up_right = 0
30
+    up_error = 0
31
+
32
+    if fact[0] == 1:
33
+        up_right = up_right + 1.12
34
+    elif fact[1] == 1:
35
+        up_right = up_right + 1.06
36
+    elif fact[2] == 1:
37
+        up_right = up_right + 1
38
+        up_error = up_error + 0.5
39
+    elif fact[3] == 1:
40
+        up_right = up_right + 0.94
41
+        up_error = up_error + 1
42
+    else:
43
+        up_error = up_error + 1
44
+        up_right = up_right + 0.88
45
+    return up_right,up_error
46
+
47
+
48
+def mul_predict(name="10_18d"):
49
+    r = 0
50
+    p = 0
51
+
52
+    for x in range(0, 8):
53
+        win_dnn, up_ratio,down_ratio  = predict(data_dir + kmeans + '\\stock160_18d_train1_B_' + str(x) + ".log", x) # stock160_18d_trai_0
54
+
55
+        r = r + up_ratio
56
+        p = p + down_ratio
57
+
58
+    print(r, p)
59
+
60
+
61
+def predict(file_path='', idx=-1):
62
+    test_x,test_y,lines=read_data(file_path)
63
+    print(idx, 'Load data success')
64
+
65
+    test_x_a = test_x[:,:18*24]
66
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
67
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
68
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
69
+    test_x_c = test_x[:,18*24:]
70
+
71
+    model=load_model(model_path.split('.')[0] + '_' + str(idx) + '.h5')
72
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
73
+    print('LSTM', score)
74
+
75
+    up_num = 0
76
+    up_error = 0
77
+    up_right = 0
78
+    down_num = 0
79
+    down_error = 0
80
+    down_right = 0
81
+    i = 0
82
+    result=model.predict([test_x_c, test_x_a, ])
83
+    win_dnn = []
84
+    for r in result:
85
+        fact = test_y[i]
86
+
87
+        if idx in [-2]:
88
+            if r[0] > 0.5 or r[1] > 0.5:
89
+                pass
90
+        else:
91
+            if r[0] > 0.6 or r[1] > 0.6:
92
+                tmp_right,tmp_error = _score(fact, lines[i])
93
+                up_right = tmp_right + up_right
94
+                up_error = tmp_error + up_error
95
+                up_num = up_num + 1
96
+            elif r[3] > 0.7 or r[4] > 0.7:
97
+                if fact[0] == 1:
98
+                    down_error = down_error + 1
99
+                    down_right = down_right + 1.12
100
+                elif fact[1] == 1:
101
+                    down_error = down_error + 1
102
+                    down_right = down_right + 1.06
103
+                elif fact[2] == 1:
104
+                    down_error = down_error + 0.5
105
+                    down_right = down_right + 1
106
+                elif fact[3] == 1:
107
+                    down_right = down_right + 0.94
108
+                else:
109
+                    down_right = down_right + 0.88
110
+                down_num = down_num + 1
111
+
112
+        i = i + 1
113
+    if up_num == 0:
114
+        up_num = 1
115
+    if down_num == 0:
116
+        down_num = 1
117
+    print('LSTM', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
118
+    return win_dnn,up_right/up_num,down_right/down_num
119
+
120
+
121
+if __name__ == '__main__':
122
+    # predict(file_path='D:\\data\\quantization\\stock160_18d_10D_test.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
123
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
124
+    mul_predict(name='stock160_18d')
125
+    # predict_today(20200229, model='11_18d')

+ 221 - 0
mix/lstm_kmeans_train.py

@@ -0,0 +1,221 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras.layers import LSTM
16
+from keras import regularizers
17
+from keras.models import Model
18
+
19
+from keras.callbacks import EarlyStopping
20
+
21
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
22
+
23
+epochs= 330
24
+size = 580000  # 61W
25
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
26
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
27
+data_dir = 'D:\\data\\quantization\\'
28
+
29
+
30
+def read_data(path):
31
+    lines = []
32
+    with open(path) as f:
33
+        for line in f.readlines()[:]:
34
+            lines.append(eval(line.strip()))
35
+
36
+    random.shuffle(lines)
37
+    print('读取数据完毕')
38
+
39
+    d=int(0.7*len(lines))
40
+
41
+    train_x=[s[:-2] for s in lines[0:d]]
42
+    train_y=[s[-1] for s in lines[0:d]]
43
+    test_x=[s[:-2] for s in lines[d:]]
44
+    test_y=[s[-1] for s in lines[d:]]
45
+
46
+    print('转换数据完毕')
47
+
48
+    ros = RandomOverSampler(random_state=0)
49
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
50
+
51
+    print('数据重采样完毕')
52
+
53
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
54
+
55
+
56
+def mul_train(name="10_18d"):
57
+    for x in range(0, 8):
58
+        score = train(data_dir + 'kmeans\\' + name + "_trai_" + str(x) + ".log", x) # stock160_18d_trai_0
59
+
60
+        with open(data_dir + name + '_lstm.log', 'a') as f:
61
+            f.write(str(x) + ':' + str(score[1]) + '\n')
62
+
63
+
64
+def train(file_path, idx):
65
+    train_x,train_y,test_x,test_y=read_data(file_path)
66
+
67
+    train_x_a = train_x[:,:18*24]
68
+    train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
69
+    # train_x_b = train_x[:, 18*24:18*16+10*18]
70
+    # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 10, 1)
71
+    train_x_c = train_x[:,18*24:]
72
+
73
+    # create the MLP and CNN models
74
+    mlp = create_mlp(train_x_c.shape[1], regress=False)
75
+    cnn_0 = create_lstm(train_x_a.shape[1], 18, 24)
76
+    # cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
77
+
78
+    # create the input to our final set of layers as the *output* of both
79
+    # the MLP and CNN
80
+    combinedInput = concatenate([mlp.output, cnn_0.output,])
81
+
82
+    # our final FC layer head will have two dense layers, the final one
83
+    # being our regression head
84
+    x = Dense(256, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
85
+    x = Dropout(0.2)(x)
86
+    x = Dense(256, activation="relu")(x)
87
+    x = Dense(512, activation="relu")(x)
88
+    # 在建设一层
89
+    x = Dense(5, activation="softmax")(x)
90
+
91
+    # our final model will accept categorical/numerical data on the MLP
92
+    # input and images on the CNN input, outputting a single value (the
93
+    # predicted price of the house)
94
+    model = Model(inputs=[mlp.input, cnn_0.input,], outputs=x)
95
+
96
+
97
+    print("Starting training ")
98
+    # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
99
+
100
+    # compile the model using mean absolute percentage error as our loss,
101
+    # implying that we seek to minimize the absolute percentage difference
102
+    # between our price *predictions* and the *actual prices*
103
+    opt = Adam(lr=1e-3, decay=1e-3 / 200)
104
+    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'],
105
+                  )
106
+
107
+    # train the model
108
+    print("[INFO] training model...")
109
+    model.fit(
110
+        [train_x_c, train_x_a, ], train_y,
111
+        # validation_data=([testAttrX, testImagesX], testY),
112
+        # epochs=int(3*train_x_a.shape[0]/1300),
113
+        epochs=epochs,
114
+        batch_size=4096, shuffle=True,
115
+        callbacks=[early_stopping]
116
+    )
117
+
118
+    test_x_a = test_x[:,:18*24]
119
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24)
120
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
121
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
122
+    test_x_c = test_x[:,18*24:]
123
+
124
+    # make predictions on the testing data
125
+    print("[INFO] predicting house prices...")
126
+    score  = model.evaluate([test_x_c, test_x_a], test_y)
127
+
128
+    print(score)
129
+    print('Test score:', score[0])
130
+    print('Test accuracy:', score[1])
131
+
132
+    model.save(model_path.split('.')[0] + '_' + str(idx) + '.h5')
133
+
134
+    return score
135
+
136
+
137
+def create_mlp(dim, regress=False):
138
+    # define our MLP network
139
+    model = Sequential()
140
+    model.add(Dense(64, input_dim=dim, activation="relu"))
141
+    model.add(Dense(64, activation="relu"))
142
+
143
+    # check to see if the regression node should be added
144
+    if regress:
145
+        model.add(Dense(1, activation="linear"))
146
+
147
+    # return our model
148
+    return model
149
+
150
+
151
+def create_cnn(width, height, depth, filters=32, kernel_size=(5, 6), regress=False, output=24):
152
+    # initialize the input shape and channel dimension, assuming
153
+    # TensorFlow/channels-last ordering
154
+    inputShape = (width, height, 1)
155
+    chanDim = -1
156
+
157
+    # define the model input
158
+    inputs = Input(shape=inputShape)
159
+
160
+    x = inputs
161
+
162
+    # CONV => RELU => BN => POOL
163
+    x = Conv2D(filters, kernel_size, strides=(2,2), padding="same",
164
+               # data_format='channels_first'
165
+               )(x)
166
+    x = Activation("relu")(x)
167
+    x = BatchNormalization(axis=chanDim)(x)
168
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
169
+    # if width > 2:
170
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
171
+    #     x = Activation("relu")(x)
172
+    #     x = BatchNormalization(axis=chanDim)(x)
173
+
174
+    # flatten the volume, then FC => RELU => BN => DROPOUT
175
+    x = Flatten()(x)
176
+    x = Dense(output)(x)
177
+    x = Activation("relu")(x)
178
+    x = BatchNormalization(axis=chanDim)(x)
179
+    x = Dropout(0.2)(x)
180
+
181
+    # apply another FC layer, this one to match the number of nodes
182
+    # coming out of the MLP
183
+    x = Dense(output)(x)
184
+    x = Activation("relu")(x)
185
+
186
+    # check to see if the regression node should be added
187
+    if regress:
188
+        x = Dense(1, activation="linear")(x)
189
+
190
+    # construct the CNN
191
+    model = Model(inputs, x)
192
+
193
+    # return the CNN
194
+    return model
195
+
196
+
197
+def create_lstm(sample, timesteps, input_dim):
198
+    inputShape = (timesteps, input_dim)
199
+
200
+    # define the model input
201
+    inputs = Input(shape=inputShape)
202
+
203
+    x = inputs
204
+
205
+    x = LSTM(units = 64, input_shape=(timesteps, input_dim), dropout=0.2
206
+               )(x)
207
+    # x = LSTM(16*16, return_sequences=False)
208
+    # x = Activation("relu")(x)
209
+    x = Dense(64)(x)
210
+    x = Dropout(0.2)(x)
211
+    x = Activation("relu")(x)
212
+
213
+    # construct the CNN
214
+    model = Model(inputs, x)
215
+
216
+    # return the CNN
217
+    return model
218
+
219
+
220
+if __name__ == '__main__':
221
+    mul_train('stock160_18d')

+ 1 - 1
mix/lstm_predict.py

@@ -102,7 +102,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
102 102
 
103 103
 
104 104
 if __name__ == '__main__':
105
-    predict(file_path='D:\\data\\quantization\\stock17_18d_test.log', model_path='17_18d_lstm_seq.h5')
105
+    predict(file_path='D:\\data\\quantization\\stock160_18d_train1.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
106 106
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
107 107
     # multi_predict(model='15_18d')
108 108
     # predict_today(20200229, model='11_18d')

+ 81 - 0
mix/lstm_predict_by_day.py

@@ -0,0 +1,81 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    day_lines = {}
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            date = str(line[-1][-1])
13
+            if date in day_lines:
14
+                day_lines[date].append(line)
15
+            else:
16
+                day_lines[date] = [line]
17
+    # print(len(day_lines['20191230']))
18
+    return day_lines
19
+
20
+
21
+def predict(file_path='', model_path='15min_dnn_seq'):
22
+    day_lines = read_data(file_path)
23
+    print('数据读取完毕')
24
+
25
+    model=load_model(model_path)
26
+    print('模型加载完毕')
27
+
28
+    items = sorted(day_lines.keys())
29
+    for key in items:
30
+        # print(day)
31
+        lines = day_lines[key]
32
+
33
+        up_num = 0
34
+        down_num = 0
35
+        size = len(lines[0])
36
+        x0 = 0
37
+        x1 = 0
38
+        x2 = 0
39
+        x3 = 0
40
+        x4 = 0
41
+
42
+        for line in lines:
43
+            train_x = np.array([line[:size - 1]])
44
+            train_x_a = train_x[:,:18*24]
45
+            train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
46
+            # train_x_b = train_x[:, 18*18:18*18+2*18]
47
+            # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
48
+            train_x_c = train_x[:,18*24:]
49
+
50
+            result = model.predict([train_x_c, train_x_a])
51
+
52
+            if result[0][3] + result[0][4] > 0.5:
53
+                down_num = down_num + 1
54
+            elif result[0][1] + result[0][0] > 0.5:
55
+                up_num = up_num + 0.6
56
+            # else:
57
+            #     up_num = up_num + 0.4 # 乐观调大 悲观调小
58
+            #     down_num = down_num + 0.6
59
+
60
+            # if result[0][0] > 0.5:
61
+            #     x0 = x0 + 1
62
+            # if result[0][1] > 0.5:
63
+            #     x1 = x1 + 1
64
+            # if result[0][2] > 0.5:
65
+            #     x2 = x3 + 1
66
+            # if result[0][3] > 0.5:
67
+            #     x3 = x3 + 1
68
+            # if result[0][4] > 0.5:
69
+            #     x4 = x4 + 1
70
+
71
+        print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2), )
72
+        # print(key, x0, x1, x2,x3,x4)
73
+
74
+
75
+if __name__ == '__main__':
76
+    # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
77
+    # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
78
+    # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
79
+    # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
80
+    predict(file_path='D:\\data\\quantization\\stock16_18d_20191225_20200310.log', model_path='16_18d_lstm_seq.h5')
81
+    # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 207 - 0
mix/lstm_predict_everyday.py

@@ -0,0 +1,207 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+holder_stock_list = [
8
+                        '000063.SZ', '002093.SZ'
9
+                        '300253.SZ', '300807.SZ',
10
+
11
+                         # b账户
12
+
13
+    ]
14
+
15
+
16
+def read_data(path):
17
+    lines = []
18
+    with open(path) as f:
19
+        for line in f.readlines()[:]:
20
+            line = eval(line.strip())
21
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
22
+                lines.append(line)
23
+
24
+    size = len(lines[0])
25
+    train_x=[s[:size - 2] for s in lines]
26
+    train_y=[s[size-1] for s in lines]
27
+    return np.array(train_x),np.array(train_y),lines
28
+
29
+
30
+import pymongo
31
+from util.mongodb import get_mongo_table_instance
32
+code_table = get_mongo_table_instance('tushare_code')
33
+k_table = get_mongo_table_instance('stock_day_k')
34
+stock_concept_table = get_mongo_table_instance('tushare_concept_detail')
35
+all_concept_code_list = list(get_mongo_table_instance('tushare_concept').find({}))
36
+
37
+
38
+industry = ['家用电器', '元器件', 'IT设备', '汽车服务',
39
+            '汽车配件', '软件服务',
40
+            '互联网', '纺织',
41
+            '塑料', '半导体',]
42
+
43
+A_concept_code_list = [   'TS2', # 5G
44
+                        'TS24', # OLED
45
+                        'TS26', #健康中国
46
+                        'TS43',  #新能源整车
47
+                        'TS59', # 特斯拉
48
+                        'TS65', #汽车整车
49
+                        'TS142', # 物联网
50
+                        'TS153', # 无人驾驶
51
+                        'TS163', # 雄安板块-智慧城市
52
+                        'TS175', # 工业自动化
53
+                        'TS232', # 新能源汽车
54
+                        'TS254', # 人工智能
55
+                        'TS258', # 互联网医疗
56
+                        'TS264', # 工业互联网
57
+                        'TS266', # 半导体
58
+                        'TS269', # 智慧城市
59
+                        'TS271', # 3D玻璃
60
+                        'TS295', # 国产芯片
61
+                        'TS303', # 医疗信息化
62
+                        'TS323', # 充电桩
63
+                        'TS328', # 虹膜识别
64
+                        'TS361', # 病毒
65
+    ]
66
+
67
+
68
+gainian_map = {}
69
+hangye_map = {}
70
+
71
+def predict_today(file, day, model='10_18d', log=True):
72
+    lines = []
73
+    with open(file) as f:
74
+        for line in f.readlines()[:]:
75
+            line = eval(line.strip())
76
+            # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
77
+            lines.append(line)
78
+
79
+    size = len(lines[0])
80
+
81
+    model=load_model(model)
82
+
83
+    for line in lines:
84
+        train_x = np.array([line[:size - 1]])
85
+        train_x_a = train_x[:,:18*24]
86
+        train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
87
+        # train_x_b = train_x[:, 18*18:18*18+2*18]
88
+        # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
89
+        train_x_c = train_x[:,18*24:]
90
+
91
+        result = model.predict([train_x_c, train_x_a])
92
+        # print(result, line[-1])
93
+        stock = code_table.find_one({'ts_code':line[-1][0]})
94
+
95
+        if result[0][0] + result[0][1] > 0.5:
96
+            if line[-1][0].startswith('688'):
97
+                continue
98
+            # 去掉ST
99
+            if stock['name'].startswith('ST') or stock['name'].startswith('N') or stock['name'].startswith('*'):
100
+                continue
101
+
102
+            if stock['ts_code'] in holder_stock_list:
103
+                print(stock['ts_code'], stock['name'], '维持买入评级')
104
+
105
+            # 跌的
106
+            k_table_list = list(k_table.find({'code':line[-1][0], 'tradeDate':{'$lte':day}}).sort("tradeDate", pymongo.DESCENDING).limit(5))
107
+            # if k_table_list[0]['close'] > k_table_list[-1]['close']*1.20:
108
+            #     continue
109
+            # if k_table_list[0]['close'] < k_table_list[-1]['close']*0.90:
110
+            #     continue
111
+            # if k_table_list[-1]['close'] > 80:
112
+            #     continue
113
+
114
+            # 指定某几个行业
115
+            # if stock['industry'] in industry:
116
+            concept_code_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
117
+            concept_detail_list = []
118
+
119
+            # 处理行业
120
+            if stock['sw_industry'] in hangye_map:
121
+                i_c = hangye_map[stock['sw_industry']]
122
+                hangye_map[stock['sw_industry']] = i_c + 1
123
+            else:
124
+                hangye_map[stock['sw_industry']] = 1
125
+
126
+            if len(concept_code_list) > 0:
127
+                for concept in concept_code_list:
128
+                    for c in all_concept_code_list:
129
+                        if c['code'] == concept['concept_code']:
130
+                            concept_detail_list.append(c['name'])
131
+
132
+                            if c['name'] in gainian_map:
133
+                                g_c = gainian_map[c['name']]
134
+                                gainian_map[c['name']] = g_c + 1
135
+                            else:
136
+                                gainian_map[c['name']] = 1
137
+
138
+            print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
139
+
140
+            if log is True:
141
+                with open('D:\\data\\quantization\\predict\\' + str(day) + '_lstm.txt', mode='a', encoding="utf-8") as f:
142
+                    f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
143
+
144
+        elif result[0][2] > 0.5:
145
+            if stock['ts_code'] in holder_stock_list:
146
+                print(stock['ts_code'], stock['name'], '震荡评级')
147
+
148
+        elif result[0][3] + result[0][4] > 0.5:
149
+            if stock['ts_code'] in holder_stock_list:
150
+                print(stock['ts_code'], stock['name'], '赶紧卖出')
151
+        else:
152
+            if stock['ts_code'] in holder_stock_list:
153
+                print(stock['ts_code'], stock['name'], result[0],)
154
+
155
+    # print(gainian_map)
156
+    # print(hangye_map)
157
+
158
+    gainian_list = [(key, gainian_map[key])for key in gainian_map]
159
+    gainian_list = sorted(gainian_list, key=lambda x:x[1], reverse=True)
160
+
161
+    hangye_list = [(key, hangye_map[key])for key in hangye_map]
162
+    hangye_list = sorted(hangye_list, key=lambda x:x[1], reverse=True)
163
+
164
+    print(gainian_list)
165
+    print(hangye_list)
166
+
167
+def _read_pfile_map(path):
168
+    s_list = []
169
+    with open(path, encoding='utf-8') as f:
170
+        for line in f.readlines()[:]:
171
+            s_list.append(line)
172
+    return s_list
173
+
174
+
175
+def join_two_day(a, b):
176
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(a) + '.txt')
177
+    b_list = _read_pfile_map('D:\\data\\quantization\\predict\\dmi_' + str(b) + '.txt')
178
+    for a in a_list:
179
+        for b in b_list:
180
+            if a[2:11] == b[2:11]:
181
+                print(a)
182
+
183
+
184
+def check_everyday(day, today):
185
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(day) + '.txt')
186
+    x = 0
187
+    for a in a_list:
188
+        print(a[:-1])
189
+        k_day_list = list(k_table.find({'code':a[2:11], 'tradeDate':{'$lte':int(today)}}).sort('tradeDate', pymongo.DESCENDING).limit(5))
190
+        if k_day_list is not None and len(k_day_list) > 0:
191
+            k_day = k_day_list[0]
192
+            k_day_0 = k_day_list[-1]
193
+            k_day_last = k_day_list[1]
194
+            if ((k_day_last['close'] - k_day_0['pre_close'])/k_day_0['pre_close']) < 0.2:
195
+                print(k_day['open'], k_day['close'], 100*(k_day['close'] - k_day_last['close'])/k_day_last['close'])
196
+                x = x + 100*(k_day['close'] - k_day_last['close'])/k_day_last['close']
197
+
198
+    print(x/len(a_list))
199
+
200
+
201
+if __name__ == '__main__':
202
+    # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
203
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
204
+    # multi_predict()
205
+    predict_today("D:\\data\\quantization\\stock160_18d_20200312.log", 20200313, model='160_18d_lstm_5D_ma5_s_seq.h5', log=True)
206
+    # join_two_day(20200305, 20200305)
207
+    # check_everyday(20200311, 20200312)

+ 13 - 11
mix/lstm_train.py

@@ -20,8 +20,10 @@ from keras.callbacks import EarlyStopping
20 20
 
21 21
 early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
22 22
 
23
-epochs= 200
24
-size = 380000
23
+epochs= 440
24
+size = 580000  # 61W
25
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
26
+model_path = '160_18d_lstm_5D_ma5_s_seq.h5'
25 27
 
26 28
 def read_data(path):
27 29
     lines = []
@@ -49,7 +51,7 @@ def read_data(path):
49 51
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50 52
 
51 53
 
52
-train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock17_18d_train.log")
54
+train_x,train_y,test_x,test_y=read_data(file_path)
53 55
 
54 56
 train_x_a = train_x[:,:18*24]
55 57
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24)
@@ -61,8 +63,8 @@ train_x_c = train_x[:,18*24:]
61 63
 def create_mlp(dim, regress=False):
62 64
     # define our MLP network
63 65
     model = Sequential()
64
-    model.add(Dense(16, input_dim=dim, activation="relu"))
65
-    model.add(Dense(16, activation="relu"))
66
+    model.add(Dense(64, input_dim=dim, activation="relu"))
67
+    model.add(Dense(64, activation="relu"))
66 68
 
67 69
     # check to see if the regression node should be added
68 70
     if regress:
@@ -151,10 +153,10 @@ combinedInput = concatenate([mlp.output, cnn_0.output,])
151 153
 
152 154
 # our final FC layer head will have two dense layers, the final one
153 155
 # being our regression head
154
-x = Dense(666, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
156
+x = Dense(256, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
155 157
 x = Dropout(0.2)(x)
156
-x = Dense(666, activation="relu")(x)
157
-x = Dense(666, activation="relu")(x)
158
+x = Dense(256, activation="relu")(x)
159
+x = Dense(512, activation="relu")(x)
158 160
 # 在建设一层
159 161
 x = Dense(5, activation="softmax")(x)
160 162
 
@@ -181,7 +183,7 @@ model.fit(
181 183
     # validation_data=([testAttrX, testImagesX], testY),
182 184
     # epochs=int(3*train_x_a.shape[0]/1300),
183 185
     epochs=epochs,
184
-    batch_size=2048, shuffle=True,
186
+    batch_size=4096, shuffle=True,
185 187
     callbacks=[early_stopping]
186 188
 )
187 189
 
@@ -199,6 +201,6 @@ print(score)
199 201
 print('Test score:', score[0])
200 202
 print('Test accuracy:', score[1])
201 203
 
202
-path="17_18d_lstm_seq.h5"
203
-model.save(path)
204
+
205
+model.save(model_path)
204 206
 model=None

+ 125 - 0
mix/mix_kmeans_predict_1.py

@@ -0,0 +1,125 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
7
+data_dir = 'D:\\data\\quantization\\'
8
+kmeans = 'roc'
9
+
10
+
11
+def read_data(path):
12
+    lines = []
13
+    with open(path) as f:
14
+        for line in f.readlines()[:]:
15
+            line = eval(line.strip())
16
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
17
+                lines.append(line)
18
+
19
+    size = len(lines[0])
20
+    train_x=[s[:size - 2] for s in lines]
21
+    train_y=[s[size-1] for s in lines]
22
+    return np.array(train_x),np.array(train_y),lines
23
+
24
+
25
+def _score(fact, line):
26
+    with open('mix_predict_dmi_18d.txt', 'a') as f:
27
+        f.write(str([line[-2], line[-1]]) + "\n")
28
+
29
+    up_right = 0
30
+    up_error = 0
31
+
32
+    if fact[0] == 1:
33
+        up_right = up_right + 1.12
34
+    elif fact[1] == 1:
35
+        up_right = up_right + 1.06
36
+    elif fact[2] == 1:
37
+        up_right = up_right + 1
38
+        up_error = up_error + 0.5
39
+    elif fact[3] == 1:
40
+        up_right = up_right + 0.94
41
+        up_error = up_error + 1
42
+    else:
43
+        up_error = up_error + 1
44
+        up_right = up_right + 0.88
45
+    return up_right,up_error
46
+
47
+
48
+def mul_predict(name="10_18d"):
49
+    r = 0
50
+    p = 0
51
+
52
+    for x in range(0, 8):
53
+        win_dnn, up_ratio,down_ratio  = predict(data_dir + kmeans + '\\stock160_18d_train1_B_' + str(x) + ".log", x) # stock160_18d_trai_0
54
+
55
+        r = r + up_ratio
56
+        p = p + down_ratio
57
+
58
+    print(r, p)
59
+
60
+
61
+def predict(file_path='', idx=-1):
62
+    test_x,test_y,lines=read_data(file_path)
63
+    print(idx, 'Load data success')
64
+
65
+    test_x_a = test_x[:,:18*24]
66
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
67
+    # test_x_b = test_x[:, 18*16:18*16+10*18]
68
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
69
+    test_x_c = test_x[:,18*24:]
70
+
71
+    model=load_model(model_path.split('.')[0] + '_' + str(idx) + '.h5')
72
+    score = model.evaluate([test_x_c, test_x_a, ], test_y)
73
+    print('MIX', score)
74
+
75
+    up_num = 0
76
+    up_error = 0
77
+    up_right = 0
78
+    down_num = 0
79
+    down_error = 0
80
+    down_right = 0
81
+    i = 0
82
+    result=model.predict([test_x_c, test_x_a, ])
83
+    win_dnn = []
84
+    for r in result:
85
+        fact = test_y[i]
86
+
87
+        if idx in [-2]:
88
+            if r[0] > 0.5 or r[1] > 0.5:
89
+                pass
90
+        else:
91
+            if r[0] > 0.6 or r[1] > 0.6:
92
+                tmp_right,tmp_error = _score(fact, lines[i])
93
+                up_right = tmp_right + up_right
94
+                up_error = tmp_error + up_error
95
+                up_num = up_num + 1
96
+            elif r[3] > 0.7 or r[4] > 0.7:
97
+                if fact[0] == 1:
98
+                    down_error = down_error + 1
99
+                    down_right = down_right + 1.12
100
+                elif fact[1] == 1:
101
+                    down_error = down_error + 1
102
+                    down_right = down_right + 1.06
103
+                elif fact[2] == 1:
104
+                    down_error = down_error + 0.5
105
+                    down_right = down_right + 1
106
+                elif fact[3] == 1:
107
+                    down_right = down_right + 0.94
108
+                else:
109
+                    down_right = down_right + 0.88
110
+                down_num = down_num + 1
111
+
112
+        i = i + 1
113
+    if up_num == 0:
114
+        up_num = 1
115
+    if down_num == 0:
116
+        down_num = 1
117
+    print('MIX', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
118
+    return win_dnn,up_right/up_num,down_right/down_num
119
+
120
+
121
+if __name__ == '__main__':
122
+    # predict(file_path='D:\\data\\quantization\\stock160_18d_10D_test.log', model_path='160_18d_lstm_5D_ma5_s_seq.h5')
123
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
124
+    mul_predict(name='stock160_18d')
125
+    # predict_today(20200229, model='11_18d')

+ 187 - 0
mix/mix_kmeans_train_1.py

@@ -0,0 +1,187 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+
18
+epochs= 50
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_10D_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
22
+data_dir = 'D:\\data\\quantization\\'
23
+
24
+
25
+def read_data(path):
26
+    lines = []
27
+    with open(path) as f:
28
+        i = 0
29
+        for line in f.readlines()[:]:
30
+            lines.append(eval(line.strip()))
31
+
32
+    random.shuffle(lines)
33
+    print('读取数据完毕')
34
+
35
+    d=int(0.7*len(lines))
36
+
37
+    train_x=[s[:-2] for s in lines[0:d]]
38
+    train_y=[s[-1] for s in lines[0:d]]
39
+    test_x=[s[:-2] for s in lines[d:]]
40
+    test_y=[s[-1] for s in lines[d:]]
41
+
42
+    print('转换数据完毕')
43
+
44
+    ros = RandomOverSampler(random_state=0)
45
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
46
+
47
+    print('数据重采样完毕')
48
+
49
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50
+
51
+
52
+def mul_train(name="10_18d"):
53
+    for x in range(0, 8):
54
+        score = train(data_dir + 'kmeans\\' + name + "_trai_" + str(x) + ".log", x) # stock160_18d_trai_0
55
+
56
+        with open(data_dir + name + '_mix.log', 'a') as f:
57
+            f.write(str(x) + ':' + str(score[1]) + '\n')
58
+
59
+
60
+def train(file_path_name, idx):
61
+    train_x,train_y,test_x,test_y=read_data(file_path_name)
62
+
63
+    train_x_a = train_x[:,:18*24]
64
+    train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
65
+    # train_x_b = train_x[:, 18*18:18*18+2*18]
66
+    # train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
67
+    train_x_c = train_x[:,18*24:]
68
+
69
+    # create the MLP and CNN models
70
+    mlp = create_mlp(train_x_c.shape[1], regress=False)
71
+    cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
72
+    # cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
73
+
74
+    # create the input to our final set of layers as the *output* of both
75
+    # the MLP and CNN
76
+    combinedInput = concatenate([mlp.output, cnn_0.output])
77
+
78
+    # our final FC layer head will have two dense layers, the final one
79
+    # being our regression head
80
+    x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
81
+    x = Dropout(0.2)(x)
82
+    x = Dense(512, activation="relu")(x)
83
+    x = Dense(512, activation="relu")(x)
84
+    # 在建设一层
85
+    x = Dense(5, activation="softmax")(x)
86
+
87
+    # our final model will accept categorical/numerical data on the MLP
88
+    # input and images on the CNN input, outputting a single value (the
89
+    # predicted price of the house)
90
+    model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
91
+
92
+
93
+    print("Starting training ")
94
+    # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
95
+
96
+    # compile the model using mean absolute percentage error as our loss,
97
+    # implying that we seek to minimize the absolute percentage difference
98
+    # between our price *predictions* and the *actual prices*
99
+    opt = Adam(lr=1e-3, decay=1e-3 / 200)
100
+    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
101
+
102
+    # train the model
103
+    print("[INFO] training model...")
104
+    model.fit(
105
+        [train_x_c, train_x_a], train_y,
106
+        # validation_data=([testAttrX, testImagesX], testY),
107
+        # epochs=int(3*train_x_a.shape[0]/1300),
108
+        epochs=epochs,
109
+        batch_size=2048, shuffle=True)
110
+
111
+    test_x_a = test_x[:,:18*24]
112
+    test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
113
+    # test_x_b = test_x[:, 18*18:18*18+2*18]
114
+    # test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
115
+    test_x_c = test_x[:,18*24:]
116
+
117
+    # make predictions on the testing data
118
+    print("[INFO] predicting house prices...")
119
+    score  = model.evaluate([test_x_c, test_x_a], test_y)
120
+
121
+    print(score)
122
+    print('Test score:', score[0])
123
+    print('Test accuracy:', score[1])
124
+
125
+    model.save(model_path.split('.')[0] + '_' + str(idx) + '.h5')
126
+    return score
127
+
128
+def create_mlp(dim, regress=False):
129
+    # define our MLP network
130
+    model = Sequential()
131
+    model.add(Dense(64, input_dim=dim, activation="relu"))
132
+    model.add(Dense(64, activation="relu"))
133
+
134
+    # check to see if the regression node should be added
135
+    if regress:
136
+        model.add(Dense(1, activation="linear"))
137
+
138
+    # return our model
139
+    return model
140
+
141
+
142
+def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
143
+    # initialize the input shape and channel dimension, assuming
144
+    # TensorFlow/channels-last ordering
145
+    inputShape = (width, height, 1)
146
+    chanDim = -1
147
+
148
+    # define the model input
149
+    inputs = Input(shape=inputShape)
150
+
151
+    x = inputs
152
+
153
+    # CONV => RELU => BN => POOL
154
+    x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
155
+    x = Activation("relu")(x)
156
+    x = BatchNormalization(axis=chanDim)(x)
157
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
158
+    # if width > 2:
159
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
160
+    #     x = Activation("relu")(x)
161
+    #     x = BatchNormalization(axis=chanDim)(x)
162
+
163
+    # flatten the volume, then FC => RELU => BN => DROPOUT
164
+    x = Flatten()(x)
165
+    x = Dense(output)(x)
166
+    x = Activation("relu")(x)
167
+    x = BatchNormalization(axis=chanDim)(x)
168
+    x = Dropout(0.2)(x)
169
+
170
+    # apply another FC layer, this one to match the number of nodes
171
+    # coming out of the MLP
172
+    x = Dense(output)(x)
173
+    x = Activation("relu")(x)
174
+
175
+    # check to see if the regression node should be added
176
+    if regress:
177
+        x = Dense(1, activation="linear")(x)
178
+
179
+    # construct the CNN
180
+    model = Model(inputs, x)
181
+
182
+    # return the CNN
183
+    return model
184
+
185
+
186
+if __name__ == '__main__':
187
+    mul_train('stock160_18d')

+ 1 - 1
mix/mix_predict_1.py

@@ -100,7 +100,7 @@ def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
100 100
 
101 101
 
102 102
 if __name__ == '__main__':
103
-    predict(file_path='D:\\data\\quantization\\stock16_18d_test_close.log', model_path='16_18d_mix_seq.h5')
103
+    predict(file_path='D:\\data\\quantization\\stock160_18d_train1.log', model_path='160_18d_mix_5D_ma5_s_seq.h5')
104 104
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
105 105
     # multi_predict(model='15_18d')
106 106
     # predict_today(20200229, model='11_18d')

+ 26 - 8
mix/mix_predict_by_day.py

@@ -33,6 +33,11 @@ def predict(file_path='', model_path='15min_dnn_seq'):
33 33
         up_num = 0
34 34
         down_num = 0
35 35
         size = len(lines[0])
36
+        x0 = 0
37
+        x1 = 0
38
+        x2 = 0
39
+        x3 = 0
40
+        x4 = 0
36 41
 
37 42
         for line in lines:
38 43
             train_x = np.array([line[:size - 1]])
@@ -44,15 +49,28 @@ def predict(file_path='', model_path='15min_dnn_seq'):
44 49
 
45 50
             result = model.predict([train_x_c, train_x_a])
46 51
 
47
-            if result[0][3] > 0.5 or result[0][4] > 0.5:
52
+            if result[0][3] + result[0][4] > 0.5:
48 53
                 down_num = down_num + 1
49
-            elif result[0][1] > 0.5 or result[0][0] > 0.5:
50
-                up_num = up_num + 1
51
-            else:
52
-                up_num = up_num + 0.4 # 乐观调大 悲观调小
53
-                down_num = down_num + 0.6
54
+            elif result[0][1] + result[0][0] > 0.5:
55
+                up_num = up_num + 0.6
56
+            # else:
57
+            #     up_num = up_num + 0.4 # 乐观调大 悲观调小
58
+            #     down_num = down_num + 0.6
59
+
60
+            maxx = max(result[0])
61
+            if maxx - result[0][0] == 0:
62
+                x0 = x0 + 1
63
+            if maxx - result[0][1] == 0:
64
+                x1 = x1 + 1
65
+            if maxx - result[0][2] == 0:
66
+                x2 = x2 + 1
67
+            if maxx - result[0][3] == 0:
68
+                x3 = x3 + 1
69
+            if maxx - result[0][4] == 0:
70
+                x4 = x4 + 1
54 71
 
55
-        print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2))
72
+        # print(key, int(up_num), int(down_num), (down_num*1.2 + 2)/(up_num*1.2 + 2), )
73
+        print(key, x0, x1, x2,x3,x4)
56 74
 
57 75
 
58 76
 if __name__ == '__main__':
@@ -60,5 +78,5 @@ if __name__ == '__main__':
60 78
     # predict(file_path='D:\\data\\quantization\\stock9_18_20200220.log', model_path='18d_dnn_seq.h5')
61 79
     # predict(file_path='D:\\data\\quantization\\stock9_18_2.log', model_path='18d_dnn_seq.h5')
62 80
     # predict(file_path='D:\\data\\quantization\\stock16_18d_20200310.log', model_path='16_18d_mix_seq')
63
-    predict(file_path='D:\\data\\quantization\\stock16_18d_20191225_20200310_1.log', model_path='16_18d_mix_seq')
81
+    predict(file_path='D:\\data\\quantization\\stock160_18d_10D_20200313.log', model_path='160_18d_mix_10D_ma5_s_seq')
64 82
     # predict(file_path='D:\\data\\quantization\\stock9_18_4.log', model_path='18d_dnn_seq.h5')

+ 51 - 33
mix/mix_predict_everyday.py

@@ -5,15 +5,11 @@ import joblib
5 5
 
6 6
 
7 7
 holder_stock_list = [
8
-                        '000063.SZ',
9
-                        '002373.SZ',
10
-                        '300253.SZ',
11
-                        '300059.SZ',
12
-                        '300807.SZ',
13
-                        '600345.SH',
8
+                        '000063.SZ', '002093.SZ'
9
+                        '300253.SZ', '300807.SZ',
10
+
14 11
                          # b账户
15
-                        '300422.SZ',
16
-                        '300468.SZ',
12
+
17 13
     ]
18 14
 
19 15
 
@@ -72,9 +68,9 @@ A_concept_code_list = [   'TS2', # 5G
72 68
 gainian_map = {}
73 69
 hangye_map = {}
74 70
 
75
-def predict_today(day, model='10_18d', log=True):
71
+def predict_today(file, day, model='10_18d', log=True):
76 72
     lines = []
77
-    with open('D:\\data\\quantization\\stock' + model[:-4] + '_' +  str(day) +'.log') as f:
73
+    with open(file) as f:
78 74
         for line in f.readlines()[:]:
79 75
             line = eval(line.strip())
80 76
             # if line[-1][0].startswith('0') or line[-1][0].startswith('3'):
@@ -82,7 +78,7 @@ def predict_today(day, model='10_18d', log=True):
82 78
 
83 79
     size = len(lines[0])
84 80
 
85
-    model=load_model(model + '_seq.h5')
81
+    model=load_model(model)
86 82
 
87 83
     for line in lines:
88 84
         train_x = np.array([line[:size - 1]])
@@ -96,7 +92,7 @@ def predict_today(day, model='10_18d', log=True):
96 92
         # print(result, line[-1])
97 93
         stock = code_table.find_one({'ts_code':line[-1][0]})
98 94
 
99
-        if result[0][0] > 0.5 or result[0][1] > 0.5:
95
+        if result[0][0] + result[0][1] > 0.5:
100 96
             if line[-1][0].startswith('688'):
101 97
                 continue
102 98
             # 去掉ST
@@ -127,42 +123,46 @@ def predict_today(day, model='10_18d', log=True):
127 123
             else:
128 124
                 hangye_map[stock['sw_industry']] = 1
129 125
 
130
-            # if len(concept_code_list) > 0:
131
-            #     for concept in concept_code_list:
132
-            #         for c in all_concept_code_list:
133
-            #             if c['code'] == concept['concept_code']:
134
-            #                 concept_detail_list.append(c['name'])
135
-            #
136
-            #                 if c['name'] in gainian_map:
137
-            #                     g_c = gainian_map[c['name']]
138
-            #                     gainian_map[c['name']] = g_c + 1
139
-            #                 else:
140
-            #                     gainian_map[c['name']] = 1
126
+            if len(concept_code_list) > 0:
127
+                for concept in concept_code_list:
128
+                    for c in all_concept_code_list:
129
+                        if c['code'] == concept['concept_code']:
130
+                            concept_detail_list.append(c['name'])
131
+
132
+                            if c['name'] in gainian_map:
133
+                                g_c = gainian_map[c['name']]
134
+                                gainian_map[c['name']] = g_c + 1
135
+                            else:
136
+                                gainian_map[c['name']] = 1
141 137
 
142 138
             print(line[-1], stock['name'], stock['sw_industry'], str(concept_detail_list), 'buy', k_table_list[0]['pct_chg'])
143 139
 
144 140
             if log is True:
145
-                with open('D:\\data\\quantization\\predict\\' + str(day) + '.txt', mode='a', encoding="utf-8") as f:
141
+                with open('D:\\data\\quantization\\predict\\' + str(day) + '_mix.txt', mode='a', encoding="utf-8") as f:
146 142
                     f.write(str(line[-1]) + ' ' + stock['name'] + ' ' + stock['sw_industry'] + ' ' + str(concept_detail_list) + ' buy' + '\n')
147 143
 
148
-
149
-            # concept_list = list(stock_concept_table.find({'ts_code':stock['ts_code']}))
150
-            # concept_list = [c['concept_code'] for c in concept_list]
151
-
152 144
         elif result[0][2] > 0.5:
153 145
             if stock['ts_code'] in holder_stock_list:
154 146
                 print(stock['ts_code'], stock['name'], '震荡评级')
155 147
 
156
-        elif result[0][3] > 0.5 or result[0][4] > 0.5:
148
+        elif result[0][3] + result[0][4] > 0.5:
157 149
             if stock['ts_code'] in holder_stock_list:
158 150
                 print(stock['ts_code'], stock['name'], '赶紧卖出')
159 151
         else:
160 152
             if stock['ts_code'] in holder_stock_list:
161 153
                 print(stock['ts_code'], stock['name'], result[0],)
162 154
 
163
-    print(gainian_map)
164
-    print(hangye_map)
155
+    # print(gainian_map)
156
+    # print(hangye_map)
157
+
158
+    gainian_list = [(key, gainian_map[key])for key in gainian_map]
159
+    gainian_list = sorted(gainian_list, key=lambda x:x[1], reverse=True)
165 160
 
161
+    hangye_list = [(key, hangye_map[key])for key in hangye_map]
162
+    hangye_list = sorted(hangye_list, key=lambda x:x[1], reverse=True)
163
+
164
+    print(gainian_list)
165
+    print(hangye_list)
166 166
 
167 167
 def _read_pfile_map(path):
168 168
     s_list = []
@@ -181,9 +181,27 @@ def join_two_day(a, b):
181 181
                 print(a)
182 182
 
183 183
 
184
+def check_everyday(day, today):
185
+    a_list = _read_pfile_map('D:\\data\\quantization\\predict\\' + str(day) + '.txt')
186
+    x = 0
187
+    for a in a_list:
188
+        print(a[:-1])
189
+        k_day_list = list(k_table.find({'code':a[2:11], 'tradeDate':{'$lte':int(today)}}).sort('tradeDate', pymongo.DESCENDING).limit(5))
190
+        if k_day_list is not None and len(k_day_list) > 0:
191
+            k_day = k_day_list[0]
192
+            k_day_0 = k_day_list[-1]
193
+            k_day_last = k_day_list[1]
194
+            if ((k_day_last['close'] - k_day_0['pre_close'])/k_day_0['pre_close']) < 0.2:
195
+                print(k_day['open'], k_day['close'], 100*(k_day['close'] - k_day_last['close'])/k_day_last['close'])
196
+                x = x + 100*(k_day['close'] - k_day_last['close'])/k_day_last['close']
197
+
198
+    print(x/len(a_list))
199
+
200
+
184 201
 if __name__ == '__main__':
185 202
     # predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
186 203
     # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
187 204
     # multi_predict()
188
-    predict_today(20200310, model='16_18d_mix', log=True)
189
-    # join_two_day(20200305, 20200305)
205
+    predict_today("D:\\data\\quantization\\stock160_18d_10D_20200313.log", 20200313, model='160_18d_mix_5D_ma5_s_seq.h5', log=True)
206
+    # join_two_day(20200305, 20200305)
207
+    # check_everyday(20200311, 20200312)

+ 4 - 3
mix/mix_train.py

@@ -21,7 +21,8 @@ early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
21 21
 
22 22
 epochs= 120
23 23
 size = 380000
24
-
24
+model_path="160_18d_mix_5D_ma5_s_seq.h5"
25
+# 已废弃
25 26
 
26 27
 def read_data(path):
27 28
     lines = []
@@ -176,6 +177,6 @@ print(score)
176 177
 print('Test score:', score[0])
177 178
 print('Test accuracy:', score[1])
178 179
 
179
-path="16_18d_mix_seq.h5"
180
-model.save(path)
180
+
181
+model.save(model_path)
181 182
 model=None

+ 10 - 8
mix/mix_train_1.py

@@ -15,15 +15,19 @@ from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,I
15 15
 from keras import regularizers
16 16
 from keras.models import Model
17 17
 
18
-epochs= 130
19
-size = 380000
18
+epochs= 440
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
20 22
 
21 23
 
22 24
 def read_data(path):
23 25
     lines = []
24 26
     with open(path) as f:
25
-        for x in range(size): #380000
26
-            lines.append(eval(f.readline().strip()))
27
+        i = 0
28
+        for x in range(size): #610000
29
+            line = eval(f.readline().strip())
30
+            lines.append(line)
27 31
 
28 32
     random.shuffle(lines)
29 33
     print('读取数据完毕')
@@ -45,7 +49,7 @@ def read_data(path):
45 49
     return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
46 50
 
47 51
 
48
-train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
52
+train_x,train_y,test_x,test_y=read_data(file_path)
49 53
 
50 54
 train_x_a = train_x[:,:18*24]
51 55
 train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
@@ -168,6 +172,4 @@ print(score)
168 172
 print('Test score:', score[0])
169 173
 print('Test accuracy:', score[1])
170 174
 
171
-path="16_18d_mix_seq.h5"
172
-model.save(path)
173
-model=None
175
+model.save(model_path)

+ 175 - 0
mix/mix_train_2.py

@@ -0,0 +1,175 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
15
+from keras import regularizers
16
+from keras.models import Model
17
+
18
+epochs= 440
19
+size = 580000
20
+file_path = 'D:\\data\\quantization\\stock160_18d_train.log'
21
+model_path = '160_18d_mix_5D_ma5_s_seq.h5'
22
+
23
+
24
+def read_data(path):
25
+    lines = []
26
+    with open(path) as f:
27
+        i = 0
28
+        for x in range(size): #610000
29
+            line = eval(f.readline().strip())
30
+            lines.append(line)
31
+
32
+    random.shuffle(lines)
33
+    print('读取数据完毕')
34
+
35
+    d=int(0.7*len(lines))
36
+
37
+    train_x=[s[:-2] for s in lines[0:d]]
38
+    train_y=[s[-1] for s in lines[0:d]]
39
+    test_x=[s[:-2] for s in lines[d:]]
40
+    test_y=[s[-1] for s in lines[d:]]
41
+
42
+    print('转换数据完毕')
43
+
44
+    ros = RandomOverSampler(random_state=0)
45
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
46
+
47
+    print('数据重采样完毕')
48
+
49
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
50
+
51
+
52
+train_x,train_y,test_x,test_y=read_data(file_path)
53
+
54
+train_x_a = train_x[:,:18*24]
55
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 24, 1)
56
+# train_x_b = train_x[:, 18*18:18*18+2*18]
57
+# train_x_b = train_x_b.reshape(train_x.shape[0], 18, 2, 1)
58
+train_x_c = train_x[:,18*24:]
59
+
60
+
61
+def create_mlp(dim, regress=False):
62
+    # define our MLP network
63
+    model = Sequential()
64
+    model.add(Dense(64, input_dim=dim, activation="relu"))
65
+    model.add(Dense(64, activation="relu"))
66
+
67
+    # check to see if the regression node should be added
68
+    if regress:
69
+        model.add(Dense(1, activation="linear"))
70
+
71
+    # return our model
72
+    return model
73
+
74
+
75
+def create_cnn(width, height, depth, filters=(4, 6), kernel_size=(5, 6), regress=False, output=24):
76
+    # initialize the input shape and channel dimension, assuming
77
+    # TensorFlow/channels-last ordering
78
+    inputShape = (width, height, 1)
79
+    chanDim = -1
80
+
81
+    # define the model input
82
+    inputs = Input(shape=inputShape)
83
+
84
+    x = inputs
85
+
86
+    # CONV => RELU => BN => POOL
87
+    x = Conv2D(32, kernel_size, strides=2, padding="same")(x)
88
+    x = Activation("relu")(x)
89
+    x = BatchNormalization(axis=chanDim)(x)
90
+    # x = MaxPooling2D(pool_size=(2, 2))(x)
91
+    # if width > 2:
92
+    #     x = Conv2D(32, (10, 6), padding="same")(x)
93
+    #     x = Activation("relu")(x)
94
+    #     x = BatchNormalization(axis=chanDim)(x)
95
+
96
+    # flatten the volume, then FC => RELU => BN => DROPOUT
97
+    x = Flatten()(x)
98
+    x = Dense(output)(x)
99
+    x = Activation("relu")(x)
100
+    x = BatchNormalization(axis=chanDim)(x)
101
+    x = Dropout(0.2)(x)
102
+
103
+    # apply another FC layer, this one to match the number of nodes
104
+    # coming out of the MLP
105
+    x = Dense(output)(x)
106
+    x = Activation("relu")(x)
107
+
108
+    # check to see if the regression node should be added
109
+    if regress:
110
+        x = Dense(1, activation="linear")(x)
111
+
112
+    # construct the CNN
113
+    model = Model(inputs, x)
114
+
115
+    # return the CNN
116
+    return model
117
+
118
+
119
+# create the MLP and CNN models
120
+mlp = create_mlp(train_x_c.shape[1], regress=False)
121
+cnn_0 = create_cnn(18, 24, 1, kernel_size=(6, 6), regress=False, output=256)
122
+# cnn_1 = create_cnn(18, 2, 1, kernel_size=(6,2), regress=False, output=36)
123
+
124
+# create the input to our final set of layers as the *output* of both
125
+# the MLP and CNN
126
+combinedInput = concatenate([mlp.output, cnn_0.output])
127
+
128
+# our final FC layer head will have two dense layers, the final one
129
+# being our regression head
130
+x = Dense(512, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
131
+x = Dropout(0.2)(x)
132
+x = Dense(512, activation="relu")(x)
133
+x = Dense(512, activation="relu")(x)
134
+# 在建设一层
135
+x = Dense(5, activation="softmax")(x)
136
+
137
+# our final model will accept categorical/numerical data on the MLP
138
+# input and images on the CNN input, outputting a single value (the
139
+# predicted price of the house)
140
+model = Model(inputs=[mlp.input, cnn_0.input], outputs=x)
141
+
142
+
143
+print("Starting training ")
144
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
145
+
146
+# compile the model using mean absolute percentage error as our loss,
147
+# implying that we seek to minimize the absolute percentage difference
148
+# between our price *predictions* and the *actual prices*
149
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
150
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
151
+
152
+# train the model
153
+print("[INFO] training model...")
154
+model.fit(
155
+    [train_x_c, train_x_a], train_y,
156
+    # validation_data=([testAttrX, testImagesX], testY),
157
+    # epochs=int(3*train_x_a.shape[0]/1300),
158
+    epochs=epochs,
159
+    batch_size=2048, shuffle=True)
160
+
161
+test_x_a = test_x[:,:18*24]
162
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 24, 1)
163
+# test_x_b = test_x[:, 18*18:18*18+2*18]
164
+# test_x_b = test_x_b.reshape(test_x.shape[0], 18, 2, 1)
165
+test_x_c = test_x[:,18*24:]
166
+
167
+# make predictions on the testing data
168
+print("[INFO] predicting house prices...")
169
+score  = model.evaluate([test_x_c, test_x_a], test_y)
170
+
171
+print(score)
172
+print('Test score:', score[0])
173
+print('Test accuracy:', score[1])
174
+
175
+model.save(model_path)