|
@@ -0,0 +1,212 @@
|
|
1
|
+import keras
|
|
2
|
+# -*- encoding:utf-8 -*-
|
|
3
|
+import numpy as np
|
|
4
|
+from keras.models import Sequential
|
|
5
|
+# 优化方法选用Adam(其实可选项有很多,如SGD)
|
|
6
|
+from keras.optimizers import Adam
|
|
7
|
+import random
|
|
8
|
+from keras.models import load_model
|
|
9
|
+from imblearn.over_sampling import RandomOverSampler
|
|
10
|
+from keras.utils import np_utils
|
|
11
|
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
|
|
12
|
+# Flatten作用是将多位输入进行一维化
|
|
13
|
+# Dense是全连接层
|
|
14
|
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
|
|
15
|
+from keras import regularizers
|
|
16
|
+from keras.models import Model
|
|
17
|
+from keras.callbacks import EarlyStopping
|
|
18
|
+
|
|
19
|
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
|
|
20
|
+
|
|
21
|
+epochs= 30
|
|
22
|
+size = 470000 #共68W
|
|
23
|
+file_path = 'D:\\data\\quantization\\stock517_28d_train2.log'
|
|
24
|
+model_path = '517_28d_mix_3D_ma5_s_seq.h5'
|
|
25
|
+file_path1='D:\\data\\quantization\\stock517_28d_test.log'
|
|
26
|
+row = 28
|
|
27
|
+col = 16
|
|
28
|
+'''
|
|
29
|
+0 dmi 28*20 38,98,51/5 下跌预判非常准
|
|
30
|
+1 macd 28*19 41,98,53/8
|
|
31
|
+2 dmi-对大盘对比 28*20 35,99,46/17
|
|
32
|
+3 5d-dmi-对大盘对比 28*20 42,99,39/10
|
|
33
|
+4 3d-dmi-对大盘对比 28*20 40,99,39/07
|
|
34
|
+5 3d-beta1 55,99,52/07 当前用这个
|
|
35
|
+6 3d-ma20 40,99,41/07
|
|
36
|
+7 3d-macd 28*19 55,99,40/07
|
|
37
|
+8 3d-市值>30 28*20 57,101,36/14 最高价 用这个!
|
|
38
|
+9 3d-市值>30 28*20 57,99,31/08 收盘最高价
|
|
39
|
+10 5d-市值>30 28*20 收盘最高价
|
|
40
|
+11 5d-市值>30 28*20 ma5
|
|
41
|
+12 5d-极简 28*16 有ma5,ma20 46,102,16/26
|
|
42
|
+13 3d-最高价 28*16 57,101,39,16
|
|
43
|
+14 5d-极简-最高价 28*16 40,102,30-34
|
|
44
|
+15 5d+dmi+最高价 28*20 40,102,31-34
|
|
45
|
+16 同12,14,参数11,10 28*16 38,102,29-36
|
|
46
|
+17 同上参数11,6 28*16 39,102,30-35 !
|
|
47
|
+
|
|
48
|
+'''
|
|
49
|
+
|
|
50
|
+def read_data(path, path1=file_path1):
|
|
51
|
+ lines = []
|
|
52
|
+ with open(path) as f:
|
|
53
|
+ for x in range(size): #680000
|
|
54
|
+ line = eval(f.readline().strip())
|
|
55
|
+ lines.append(line)
|
|
56
|
+
|
|
57
|
+ with open(path1) as f:
|
|
58
|
+ for x in range(30000): #6w
|
|
59
|
+ line = eval(f.readline().strip())
|
|
60
|
+ lines.append(line)
|
|
61
|
+
|
|
62
|
+ random.shuffle(lines)
|
|
63
|
+ print('读取数据完毕')
|
|
64
|
+
|
|
65
|
+ d=int(0.85*len(lines))
|
|
66
|
+ length = len(lines[0])
|
|
67
|
+
|
|
68
|
+ train_x=[s[:length - 2] for s in lines[0:d]]
|
|
69
|
+ train_y=[s[-1] for s in lines[0:d]]
|
|
70
|
+ test_x=[s[:length - 2] for s in lines[d:]]
|
|
71
|
+ test_y=[s[-1] for s in lines[d:]]
|
|
72
|
+
|
|
73
|
+ print('转换数据完毕')
|
|
74
|
+
|
|
75
|
+ ros = RandomOverSampler(random_state=0)
|
|
76
|
+ X_resampled, y_resampled = ros.fit_sample(np.array(train_x, dtype=np.float32), np.array(train_y, dtype=np.float32))
|
|
77
|
+
|
|
78
|
+ print('数据重采样完毕')
|
|
79
|
+
|
|
80
|
+ return X_resampled,y_resampled,np.array(test_x, dtype=np.float32),np.array(test_y, dtype=np.float32)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+train_x,train_y,test_x,test_y=read_data(file_path)
|
|
84
|
+
|
|
85
|
+train_x_a = train_x[:,:row*col]
|
|
86
|
+train_x_a = train_x_a.reshape(train_x.shape[0], row, col, 1)
|
|
87
|
+# train_x_b = train_x[:, 9*26:18*26]
|
|
88
|
+# train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
|
|
89
|
+train_x_c = train_x[:,row*col:]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+def create_mlp(dim, regress=False):
|
|
93
|
+ # define our MLP network
|
|
94
|
+ model = Sequential()
|
|
95
|
+ model.add(Dense(256, input_dim=dim, activation="relu"))
|
|
96
|
+ model.add(Dropout(0.2))
|
|
97
|
+ model.add(Dense(256, activation="relu"))
|
|
98
|
+ model.add(Dense(256, activation="relu"))
|
|
99
|
+ model.add(Dense(128, activation="relu"))
|
|
100
|
+
|
|
101
|
+ # check to see if the regression node should be added
|
|
102
|
+ if regress:
|
|
103
|
+ model.add(Dense(1, activation="linear"))
|
|
104
|
+
|
|
105
|
+ # return our model
|
|
106
|
+ return model
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
|
|
110
|
+ # initialize the input shape and channel dimension, assuming
|
|
111
|
+ # TensorFlow/channels-last ordering
|
|
112
|
+ inputShape = (width, height, 1)
|
|
113
|
+ chanDim = -1
|
|
114
|
+
|
|
115
|
+ # define the model input
|
|
116
|
+ inputs = Input(shape=inputShape)
|
|
117
|
+ # x = inputs
|
|
118
|
+ # CONV => RELU => BN => POOL
|
|
119
|
+ x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
|
|
120
|
+ x = Activation("relu")(x)
|
|
121
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
122
|
+
|
|
123
|
+ # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
|
|
124
|
+ # y = Activation("relu")(y)
|
|
125
|
+ # y = BatchNormalization(axis=chanDim)(y)
|
|
126
|
+
|
|
127
|
+ # flatten the volume, then FC => RELU => BN => DROPOUT
|
|
128
|
+ x = Flatten()(x)
|
|
129
|
+ x = Dense(output)(x)
|
|
130
|
+ x = Activation("relu")(x)
|
|
131
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
132
|
+ x = Dropout(0.2)(x)
|
|
133
|
+
|
|
134
|
+ # apply another FC layer, this one to match the number of nodes
|
|
135
|
+ # coming out of the MLP
|
|
136
|
+ x = Dense(output)(x)
|
|
137
|
+ x = Activation("relu")(x)
|
|
138
|
+
|
|
139
|
+ # check to see if the regression node should be added
|
|
140
|
+ if regress:
|
|
141
|
+ x = Dense(1, activation="linear")(x)
|
|
142
|
+
|
|
143
|
+ # construct the CNN
|
|
144
|
+ model = Model(inputs, x)
|
|
145
|
+
|
|
146
|
+ # return the CNN
|
|
147
|
+ return model
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+# create the MLP and CNN models
|
|
151
|
+mlp = create_mlp(train_x_c.shape[1], regress=False)
|
|
152
|
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 3), size=90, regress=False, output=96) # 31 97 46
|
|
153
|
+cnn_0 = create_cnn(row, col, 1, kernel_size=(6, col), size=96, regress=False, output=96) # 29 98 47
|
|
154
|
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(9, 9), size=90, regress=False, output=96) # 28 97 53
|
|
155
|
+# cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=90, regress=False, output=96)
|
|
156
|
+# cnn_1 = create_cnn(18, 20, 1, kernel_size=(18, 10), size=80, regress=False, output=96)
|
|
157
|
+# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
|
|
158
|
+
|
|
159
|
+# create the input to our final set of layers as the *output* of both
|
|
160
|
+# the MLP and CNN
|
|
161
|
+combinedInput = concatenate([mlp.output, cnn_0.output, ])
|
|
162
|
+
|
|
163
|
+# our final FC layer head will have two dense layers, the final one
|
|
164
|
+# being our regression head
|
|
165
|
+x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
|
|
166
|
+x = Dropout(0.2)(x)
|
|
167
|
+x = Dense(1024, activation="relu")(x)
|
|
168
|
+x = Dense(1024, activation="relu")(x)
|
|
169
|
+# 在建设一层
|
|
170
|
+x = Dense(4, activation="softmax")(x)
|
|
171
|
+
|
|
172
|
+# our final model will accept categorical/numerical data on the MLP
|
|
173
|
+# input and images on the CNN input, outputting a single value (the
|
|
174
|
+# predicted price of the house)
|
|
175
|
+model = Model(inputs=[mlp.input, cnn_0.input, ], outputs=x)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+print("Starting training ")
|
|
179
|
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
|
|
180
|
+
|
|
181
|
+# compile the model using mean absolute percentage error as our loss,
|
|
182
|
+# implying that we seek to minimize the absolute percentage difference
|
|
183
|
+# between our price *predictions* and the *actual prices*
|
|
184
|
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
|
|
185
|
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
|
|
186
|
+
|
|
187
|
+# train the model
|
|
188
|
+print("[INFO] training model...")
|
|
189
|
+model.fit(
|
|
190
|
+ [train_x_c, train_x_a, ], train_y,
|
|
191
|
+ # validation_data=([testAttrX, testImagesX], testY),
|
|
192
|
+ # epochs=int(3*train_x_a.shape[0]/1300),
|
|
193
|
+ epochs=epochs,
|
|
194
|
+ batch_size=2048, shuffle=True,
|
|
195
|
+ callbacks=[early_stopping]
|
|
196
|
+)
|
|
197
|
+
|
|
198
|
+model.save(model_path)
|
|
199
|
+
|
|
200
|
+test_x_a = test_x[:,:row*col]
|
|
201
|
+test_x_a = test_x_a.reshape(test_x.shape[0], row, col, 1)
|
|
202
|
+# test_x_b = test_x[:, 9*26:9*26+9*26]
|
|
203
|
+# test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
|
|
204
|
+test_x_c = test_x[:,row*col:]
|
|
205
|
+
|
|
206
|
+# make predictions on the testing data
|
|
207
|
+print("[INFO] predicting house prices...")
|
|
208
|
+score = model.evaluate([test_x_c, test_x_a,], test_y)
|
|
209
|
+
|
|
210
|
+print(score)
|
|
211
|
+print('Test score:', score[0])
|
|
212
|
+print('Test accuracy:', score[1])
|