yufeng0528 4 years ago
parent
commit
5b9c05337b
4 changed files with 156 additions and 13 deletions
  1. 36 2
      stock/compont_predict.py
  2. 17 5
      stock/dnn_predict.py
  3. 7 6
      stock/dnn_train.py
  4. 96 0
      stock/kmeans.py

+ 36 - 2
stock/compont_predict.py

@@ -1,5 +1,39 @@
1 1
 from stock import cnn_predict
2 2
 from stock import dnn_predict
3
+from stock import kmeans
4
+
5
+
6
+def dnn_and_kmeans():
7
+    # cnn_result = cnn_predict.predict()
8
+    dnn_result = dnn_predict.predict(file_path='D:\\data\\quantization\\stock6_5_test.log', model_path='5d_dnn_seq.h5')
9
+    cnn_result = kmeans.class_fic(file_path="D:\\data\\quantization\\stock2_20.log")
10
+    print('计算完成')
11
+
12
+    with open('dnn_and_kmeans.txt', 'a') as f:
13
+        up_num = 0
14
+        up_right = 0
15
+        i = 0
16
+        for m in cnn_result: #3位 时间是字符串
17
+            # print('find', m)
18
+            for n in dnn_result: #5位 时间是int
19
+                if m[0][0] == n[0][0] and int(m[0][1]) == n[0][1]:
20
+                    print('AND', m)
21
+                    f.write(str(m) + '\n')
22
+
23
+                    if n[1][0] == 1:
24
+                        up_right = up_right + 1.13
25
+                    elif n[1][1] == 1:
26
+                        up_right = up_right + 1.05
27
+                    elif n[1][2] == 1:
28
+                        up_right = up_right + 1
29
+                    else:
30
+                        up_right = up_right - 0.15
31
+                    up_num = up_num + 1
32
+                    i = i + 1
33
+
34
+                    break
35
+
36
+    print(up_right, up_num, up_right / up_num)
3 37
 
4 38
 
5 39
 def and_predict():
@@ -30,6 +64,6 @@ def and_predict():
30 64
 
31 65
     print(up_right, up_num, up_right / up_num)
32 66
 
33
-
34 67
 if __name__ == '__main__':
35
-    and_predict()
68
+    # and_predict()
69
+    dnn_and_kmeans()

+ 17 - 5
stock/dnn_predict.py

@@ -1,9 +1,5 @@
1
-import keras
2 1
 # -*- encoding:utf-8 -*-
3 2
 import numpy as np
4
-from keras.models import Sequential
5
-from keras.layers import Dense,Dropout
6
-import random
7 3
 from keras.models import load_model
8 4
 
9 5
 
@@ -38,9 +34,25 @@ def predict(file_path='', model_path='15min_dnn_seq.h5'):
38 34
                 f.write(str([lines[i][-2], lines[i][-1]]) + "\n")
39 35
                 win_dnn.append([lines[i][-2], lines[i][-1]])
40 36
                 if fact[0] == 1:
37
+                    up_right = up_right + 1.15
38
+                elif fact[1] == 1:
39
+                    up_right = up_right + 1.05
40
+                elif fact[2] == 1:
41 41
                     up_right = up_right + 1
42
+                else:
43
+                    up_right = up_right - 0.15
44
+                up_num = up_num + 1
45
+            elif r[1] > 0.5:
46
+                f.write(str([lines[i][-2], lines[i][-1]]) + "\n")
47
+                win_dnn.append([lines[i][-2], lines[i][-1]])
48
+                if fact[0] == 1:
49
+                    up_right = up_right + 1.15
42 50
                 elif fact[1] == 1:
43
-                    up_right = up_right + 0.2
51
+                    up_right = up_right + 1.05
52
+                elif fact[2] == 1:
53
+                    up_right = up_right + 1
54
+                else:
55
+                    up_right = up_right - 0.15
44 56
                 up_num = up_num + 1
45 57
 
46 58
             i = i + 1

+ 7 - 6
stock/dnn_train.py

@@ -12,7 +12,7 @@ from imblearn.over_sampling import RandomOverSampler
12 12
 def read_data(path):
13 13
     lines = []
14 14
     with open(path) as f:
15
-        for x in range(60000):
15
+        for x in range(30000):
16 16
             lines.append(eval(f.readline().strip()))
17 17
 
18 18
     random.shuffle(lines)
@@ -41,19 +41,20 @@ def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stoc
41 41
 
42 42
     model = Sequential()
43 43
     model.add(Dense(units=120+input_dim, input_dim=input_dim,  activation='relu'))
44
-    # model.add(Dense(units=60+int(input_dim/2), activation='relu'))
45 44
     model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l2(0.001)))
45
+    model.add(Dense(units=120+input_dim, activation='relu'))
46 46
     model.add(Dropout(0.2))
47
-    model.add(Dense(units=60+input_dim, activation='relu'))
47
+    model.add(Dense(units=60+input_dim, activation='selu'))
48 48
     model.add(Dropout(0.2))
49 49
     model.add(Dense(units=60+input_dim, activation='selu'))
50 50
     # model.add(Dropout(0.2))
51
-    model.add(Dense(units=512, activation='selu'))
51
+    model.add(Dense(units=512, activation='relu'))
52
+
52 53
     model.add(Dense(units=result_class, activation='softmax'))
53 54
     model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
54 55
 
55 56
     print("Starting training ")
56
-    h=model.fit(train_x, train_y, batch_size=32, epochs=126, shuffle=True)
57
+    model.fit(train_x, train_y, batch_size=32, epochs=56, shuffle=True)
57 58
     score = model.evaluate(test_x, test_y)
58 59
     print(score)
59 60
     print('Test score:', score[0])
@@ -69,5 +70,5 @@ def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stoc
69 70
 
70 71
 
71 72
 if __name__ == '__main__':
72
-    train(input_dim=46, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
73
+    train(input_dim=86, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
73 74
     # train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name='15m_dnn_seq.h5')

+ 96 - 0
stock/kmeans.py

@@ -0,0 +1,96 @@
1
+# -*- encoding:utf-8 -*-
2
+from sklearn.cluster import KMeans
3
+import numpy as np
4
+from annoy import AnnoyIndex
5
+
6
+
7
+def read_data(path):
8
+    lines = []
9
+    with open(path) as f:
10
+        for x in range(160000):
11
+            line = eval(f.readline().strip())
12
+            # if line[-1][0] == 1 or line[-1][1] == 1:
13
+            lines.append(line)
14
+
15
+    return lines
16
+
17
+length = 20
18
+def class_fic(file_path=''):
19
+    lines = read_data(file_path)
20
+    print('读取数据完毕')
21
+    size = len(lines[0])
22
+    train_x = np.array([s[:length] for s in lines])
23
+    train_y = [s[size - 1] for s in lines]
24
+    v_x = train_x.reshape(train_x.shape[0], 4*length)
25
+    stock_list = [s[size - 2] for s in lines]
26
+
27
+    # annoy_sim(v_x)
28
+    print('save数据完毕')
29
+    return find_annoy(train_y, stock_list)
30
+
31
+def annoy_sim(lines):
32
+    tree = 30
33
+    t = AnnoyIndex(length*4, metric="angular") # 24是向量维度
34
+
35
+    i = 0
36
+    for stock in lines:
37
+        t.add_item(i, stock)
38
+        i = i + 1
39
+
40
+    t.build(tree)
41
+    t.save('stock_20d.ann')
42
+
43
+
44
+def find_annoy(lines, stock_list):
45
+    t = AnnoyIndex(length*4, metric="angular")
46
+    t.load('stock_20d.ann')
47
+    num = 0
48
+    right = 0
49
+
50
+    win_dnn = []
51
+    for i in range(len(lines)):
52
+        index, distance = t.get_nns_by_item(i, 10, include_distances=True)
53
+        # print(index, distance)
54
+
55
+        # 预测
56
+        total = 0
57
+        g = 0
58
+        for j in range(1, len(index)):
59
+            if distance[j] < 0.4:
60
+                total = total + 1
61
+                if lines[j][0] == 1:
62
+                    g = g + 1
63
+                elif lines[j][1] == 1:
64
+                    g = g + 1
65
+                elif lines[j][2] == 1:
66
+                    g = g + 0.5
67
+        if total > 1 and g / total > 0.38:
68
+            right = right + 1
69
+            if stock_list[i][1] > 20181101:
70
+                print(stock_list[i])
71
+                win_dnn.append([stock_list[i], lines[i]])
72
+
73
+
74
+        # 计算
75
+        # if lines[i][0] == 1:
76
+        #     g = 0
77
+        #     total = 0
78
+        #     for j in range(1,len(index)):
79
+        #         if distance[j] < 0.4:
80
+        #             total = total + 1
81
+        #             if lines[j][0] == 1:
82
+        #                 g = g+1
83
+        #             elif lines[j][1] == 1:
84
+        #                 g = g+1
85
+        #     if total > 1 and g/total > 0.21:
86
+        #         right = right + 1
87
+        #     if total > 1:
88
+        #         num = num + 1
89
+
90
+    print(right, num)
91
+    print('find数据完毕')
92
+    return win_dnn
93
+
94
+
95
+if __name__ == '__main__':
96
+    class_fic(file_path="D:\\data\\quantization\\stock2_20.log")