yufeng 4 years ago
parent
commit
6f6d039c06
2 changed files with 232 additions and 0 deletions
  1. 126 0
      stock/cnn_predict_dmi.py
  2. 106 0
      stock/cnn_train_dmi.py

+ 126 - 0
stock/cnn_predict_dmi.py

@@ -0,0 +1,126 @@
1
+# -*- encoding:utf-8 -*-
2
+import numpy as np
3
+from keras.models import load_model
4
+import joblib
5
+
6
+
7
+def read_data(path):
8
+    lines = []
9
+    with open(path) as f:
10
+        for line in f.readlines()[:]:
11
+            line = eval(line.strip())
12
+            if line[-2][0].startswith('0') or line[-2][0].startswith('3'):
13
+                lines.append(line)
14
+
15
+    size = len(lines[0])
16
+    train_x=[s[:size - 2] for s in lines]
17
+    train_y=[s[size-1] for s in lines]
18
+    return np.array(train_x),np.array(train_y),lines
19
+
20
+
21
+def _score(fact, line):
22
+    with open('dnn_predict_dmi_18d.txt', 'a') as f:
23
+        f.write(str([line[-2], line[-1]]) + "\n")
24
+
25
+    up_right = 0
26
+    up_error = 0
27
+
28
+    if fact[0] == 1:
29
+        up_right = up_right + 1.12
30
+    elif fact[1] == 1:
31
+        up_right = up_right + 1.06
32
+    elif fact[2] == 1:
33
+        up_right = up_right + 1
34
+    elif fact[3] == 1:
35
+        up_right = up_right + 0.94
36
+    else:
37
+        up_error = up_error + 1
38
+        up_right = up_right + 0.88
39
+    return up_right,up_error
40
+
41
+
42
+def predict(file_path='', model_path='15min_dnn_seq.h5', idx=-1):
43
+    test_x,test_y,lines=read_data(file_path)
44
+    test_x = test_x.reshape(test_x.shape[0], 1,6,77)
45
+
46
+    model=load_model(model_path)
47
+    score = model.evaluate(test_x, test_y)
48
+    print('DNN', score)
49
+
50
+    up_num = 0
51
+    up_error = 0
52
+    up_right = 0
53
+    down_num = 0
54
+    down_error = 0
55
+    down_right = 0
56
+    i = 0
57
+    result=model.predict(test_x)
58
+    win_dnn = []
59
+    for r in result:
60
+        fact = test_y[i]
61
+
62
+        if idx in [-2]:
63
+            if r[0] > 0.5 or r[1] > 0.5:
64
+                pass
65
+                # if fact[0] == 1:
66
+                #     up_right = up_right + 1.12
67
+                # elif fact[1] == 1:
68
+                #     up_right = up_right + 1.06
69
+                # elif fact[2] == 1:
70
+                #     up_right = up_right + 1
71
+                # elif fact[3] == 1:
72
+                #     up_right = up_right + 0.94
73
+                # else:
74
+                #     up_error = up_error + 1
75
+                #     up_right = up_right + 0.88
76
+                # up_num = up_num + 1
77
+        else:
78
+            if r[0] > 0.6 or r[1] > 0.6:
79
+                tmp_right,tmp_error = _score(fact, lines[i])
80
+                up_right = tmp_right + up_right
81
+                up_error = tmp_error + up_error
82
+                up_num = up_num + 1
83
+            elif r[3] > 0.5 or r[4] > 0.5:
84
+                if fact[0] == 1:
85
+                    down_error = down_error + 1
86
+                    down_right = down_right + 1.12
87
+                elif fact[1] == 1:
88
+                    down_right = down_right + 1.06
89
+                elif fact[2] == 1:
90
+                    down_right = down_right + 1
91
+                elif fact[3] == 1:
92
+                    down_right = down_right + 0.94
93
+                else:
94
+                    down_right = down_right + 0.88
95
+                down_num = down_num + 1
96
+
97
+        i = i + 1
98
+    if up_num == 0:
99
+        up_num = 1
100
+    if down_num == 0:
101
+        down_num = 1
102
+    print('DNN', up_right, up_num, up_right/up_num, up_error/up_num, down_right/down_num, down_error/down_num)
103
+    return win_dnn,up_right/up_num,down_right/down_num
104
+
105
+
106
+def multi_predict(model='14_18d'):
107
+    r = 0;
108
+    p = 0
109
+    for x in range(0, 12): # 0,2,3,4,6,8,9,10,11
110
+    # for x in [5,9,11,0,3,4,8]: #10_18,0没数据需要重新计算 [0,2,3,4,5,9,10,11]
111
+    # for x in [0,1,10]:
112
+    # for x in [2,4,7,10]: # 2表现最好 优秀的 0,8正确的反向指标,(9错误的反向指标 样本量太少)
113
+        print(x)
114
+    # for x in [0,2,5,6,7]: # 5表现最好
115
+        win_dnn, up_ratio,down_ratio = predict(file_path='D:\\data\\quantization\\kmeans\\stock' + model + '_test_' + str(x) + '.log',
116
+                                               model_path=model + '_dnn_seq_' + str(x) + '.h5', idx=x)
117
+        r = r + up_ratio
118
+        p = p + down_ratio
119
+    print(r, p)
120
+
121
+
122
+if __name__ == '__main__':
123
+    predict(file_path='D:\\data\\quantization\\stock16_18d_test.log', model_path='16_18d_cnn_seq.h5')
124
+    # predict(file_path='D:\\data\\quantization\\stock6_test.log', model_path='15m_dnn_seq.h5')
125
+    # multi_predict(model='15_18d')
126
+    # predict_today(20200229, model='11_18d')

+ 106 - 0
stock/cnn_train_dmi.py

@@ -0,0 +1,106 @@
1
+import keras
2
+# -*- encoding:utf-8 -*-
3
+import numpy as np
4
+from keras.models import Sequential
5
+# 优化方法选用Adam(其实可选项有很多,如SGD)
6
+from keras.optimizers import Adam
7
+import random
8
+from keras.models import load_model
9
+from imblearn.over_sampling import RandomOverSampler
10
+from keras.utils import np_utils
11
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
12
+# Flatten作用是将多位输入进行一维化
13
+# Dense是全连接层
14
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout
15
+from keras import regularizers
16
+
17
+
18
+def read_data(path):
19
+    lines = []
20
+    with open(path) as f:
21
+        for x in range(200000):
22
+            lines.append(eval(f.readline().strip()))
23
+
24
+    random.shuffle(lines)
25
+    print('读取数据完毕')
26
+
27
+    d=int(0.75*len(lines))
28
+
29
+    train_x=[s[:-2] for s in lines[0:d]]
30
+    train_y=[s[-1] for s in lines[0:d]]
31
+    test_x=[s[:-2] for s in lines[d:]]
32
+    test_y=[s[-1] for s in lines[d:]]
33
+
34
+    print('转换数据完毕')
35
+
36
+    ros = RandomOverSampler(random_state=0)
37
+    X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
38
+
39
+    print('数据重采样完毕')
40
+
41
+    return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
42
+
43
+
44
+train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
45
+train_x = train_x.reshape(train_x.shape[0], 1,6,77)
46
+test_x = test_x.reshape(test_x.shape[0], 1,6, 77)
47
+
48
+
49
+
50
+model = Sequential()
51
+
52
+# 模型卷积层设计
53
+model.add(Conv2D(
54
+    nb_filter=32,  # 第一层设置32个滤波器
55
+    nb_row=10,
56
+    nb_col=6,  # 设置滤波器的大小为5*5
57
+    padding='same',  # 选择滤波器的扫描方式,即是否考虑边缘
58
+    input_shape=(1,6,77),  # 设置输入的形状
59
+    # batch_input_shape=(64, 1, 28, 28),
60
+))
61
+# 选择激活函数
62
+model.add(Activation('relu'))
63
+
64
+# 设置下采样(池化层)
65
+model.add(MaxPool2D(
66
+    pool_size=(4,1),  # 下采样格为2*2
67
+    strides=(2,2),  # 向右向下的步长
68
+    padding='same', # padding mode is 'same'
69
+))
70
+
71
+# 使用Flatten函数,将输入数据扁平化(因为输入数据是一个多维的形式,需要将其扁平化)
72
+model.add(Flatten())  # 将多维的输入一维化
73
+model.add(Dense(units=777, activation='relu', kernel_regularizer=regularizers.l1(0.003)))
74
+model.add(Dropout(0.2))
75
+model.add(Dense(units=777, activation='relu'))
76
+model.add(Dense(units=777, activation='relu'))
77
+model.add(Dense(units=777, activation='relu',kernel_regularizer=regularizers.l1(0.002)))
78
+model.add(Dropout(0.2))
79
+model.add(Dense(units=1024, activation='relu'))
80
+
81
+# 在建设一层
82
+model.add(Dense(5))  # 输入是个类别
83
+model.add(Activation('softmax'))  # 用于分类的softmax函数
84
+
85
+adam = Adam()  # 学习速率lr=0.0001
86
+
87
+model.compile(optimizer=adam,
88
+    loss='categorical_crossentropy',
89
+    metrics=['accuracy'])
90
+
91
+print("Starting training ")
92
+h=model.fit(train_x, train_y, batch_size=4096*2, epochs=150, shuffle=True)
93
+score = model.evaluate(test_x, test_y)
94
+print(score)
95
+print('Test score:', score[0])
96
+print('Test accuracy:', score[1])
97
+
98
+path="16_18d_cnn_seq.h5"
99
+model.save(path)
100
+model=None
101
+
102
+
103
+model=load_model(path)
104
+result=model.predict(test_x)
105
+print(result)
106
+print(test_y)