|
@@ -0,0 +1,194 @@
|
|
1
|
+import keras
|
|
2
|
+# -*- encoding:utf-8 -*-
|
|
3
|
+import numpy as np
|
|
4
|
+from keras.models import Sequential
|
|
5
|
+# 优化方法选用Adam(其实可选项有很多,如SGD)
|
|
6
|
+from keras.optimizers import Adam
|
|
7
|
+import random
|
|
8
|
+from keras.models import load_model
|
|
9
|
+from imblearn.over_sampling import RandomOverSampler
|
|
10
|
+from keras.utils import np_utils
|
|
11
|
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
|
|
12
|
+# Flatten作用是将多位输入进行一维化
|
|
13
|
+# Dense是全连接层
|
|
14
|
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
|
|
15
|
+from keras import regularizers
|
|
16
|
+from keras.models import Model
|
|
17
|
+from keras.callbacks import EarlyStopping
|
|
18
|
+
|
|
19
|
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
|
|
20
|
+
|
|
21
|
+epochs= 68
|
|
22
|
+size = 400000 #18W 60W
|
|
23
|
+file_path = 'D:\\data\\quantization\\stock200_18d_train2.log'
|
|
24
|
+model_path = '200_18d_mix_6D_ma5_s_seq.h5'
|
|
25
|
+file_path1='D:\\data\\quantization\\stock200_18d_test.log'
|
|
26
|
+'''
|
|
27
|
+ROC
|
|
28
|
+1大盘涨停 涨停/跌停/创业板涨停跌停
|
|
29
|
+2大盘涨停 涨停/跌停
|
|
30
|
+3beta函数修正
|
|
31
|
+4用两个kernel
|
|
32
|
+
|
|
33
|
+'''
|
|
34
|
+
|
|
35
|
+def read_data(path, path1=file_path1):
|
|
36
|
+ lines = []
|
|
37
|
+ with open(path) as f:
|
|
38
|
+ for x in range(size): #610000
|
|
39
|
+ line = eval(f.readline().strip())
|
|
40
|
+ lines.append(line)
|
|
41
|
+
|
|
42
|
+ with open(path1) as f:
|
|
43
|
+ for x in range(30000):
|
|
44
|
+ line = eval(f.readline().strip())
|
|
45
|
+ lines.append(line)
|
|
46
|
+
|
|
47
|
+ random.shuffle(lines)
|
|
48
|
+ print('读取数据完毕')
|
|
49
|
+
|
|
50
|
+ d=int(0.85*len(lines))
|
|
51
|
+ length = len(lines[0])
|
|
52
|
+
|
|
53
|
+ train_x=[s[:length - 2] for s in lines[0:d]]
|
|
54
|
+ train_y=[s[-1] for s in lines[0:d]]
|
|
55
|
+ test_x=[s[:length - 2] for s in lines[d:]]
|
|
56
|
+ test_y=[s[-1] for s in lines[d:]]
|
|
57
|
+
|
|
58
|
+ print('转换数据完毕')
|
|
59
|
+
|
|
60
|
+ ros = RandomOverSampler(random_state=0)
|
|
61
|
+ X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
|
|
62
|
+
|
|
63
|
+ print('数据重采样完毕')
|
|
64
|
+
|
|
65
|
+ return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+train_x,train_y,test_x,test_y=read_data(file_path)
|
|
69
|
+
|
|
70
|
+train_x_a = train_x[:,:18*20]
|
|
71
|
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 20, 1)
|
|
72
|
+# train_x_b = train_x[:, 9*26:18*26]
|
|
73
|
+# train_x_b = train_x_b.reshape(train_x.shape[0], 9, 26, 1)
|
|
74
|
+train_x_c = train_x[:,18*20:]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+def create_mlp(dim, regress=False):
|
|
78
|
+ # define our MLP network
|
|
79
|
+ model = Sequential()
|
|
80
|
+ model.add(Dense(96, input_dim=dim, activation="relu"))
|
|
81
|
+ model.add(Dense(96, activation="relu"))
|
|
82
|
+
|
|
83
|
+ # check to see if the regression node should be added
|
|
84
|
+ if regress:
|
|
85
|
+ model.add(Dense(1, activation="linear"))
|
|
86
|
+
|
|
87
|
+ # return our model
|
|
88
|
+ return model
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+def create_cnn(width, height, depth, size=48, kernel_size=(5, 6), regress=False, output=24):
|
|
92
|
+ # initialize the input shape and channel dimension, assuming
|
|
93
|
+ # TensorFlow/channels-last ordering
|
|
94
|
+ inputShape = (width, height, 1)
|
|
95
|
+ chanDim = -1
|
|
96
|
+
|
|
97
|
+ # define the model input
|
|
98
|
+ inputs = Input(shape=inputShape)
|
|
99
|
+ # x = inputs
|
|
100
|
+ # CONV => RELU => BN => POOL
|
|
101
|
+ x = Conv2D(size, kernel_size, strides=2, padding="same")(inputs)
|
|
102
|
+ x = Activation("relu")(x)
|
|
103
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
104
|
+
|
|
105
|
+ # y = Conv2D(24, (2, 8), strides=2, padding="same")(inputs)
|
|
106
|
+ # y = Activation("relu")(y)
|
|
107
|
+ # y = BatchNormalization(axis=chanDim)(y)
|
|
108
|
+
|
|
109
|
+ # flatten the volume, then FC => RELU => BN => DROPOUT
|
|
110
|
+ x = Flatten()(x)
|
|
111
|
+ x = Dense(output)(x)
|
|
112
|
+ x = Activation("relu")(x)
|
|
113
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
114
|
+ x = Dropout(0.2)(x)
|
|
115
|
+
|
|
116
|
+ # apply another FC layer, this one to match the number of nodes
|
|
117
|
+ # coming out of the MLP
|
|
118
|
+ x = Dense(output)(x)
|
|
119
|
+ x = Activation("relu")(x)
|
|
120
|
+
|
|
121
|
+ # check to see if the regression node should be added
|
|
122
|
+ if regress:
|
|
123
|
+ x = Dense(1, activation="linear")(x)
|
|
124
|
+
|
|
125
|
+ # construct the CNN
|
|
126
|
+ model = Model(inputs, x)
|
|
127
|
+
|
|
128
|
+ # return the CNN
|
|
129
|
+ return model
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+# create the MLP and CNN models
|
|
133
|
+mlp = create_mlp(train_x_c.shape[1], regress=False)
|
|
134
|
+# cnn_0 = create_cnn(18, 21, 1, kernel_size=(3, 3), size=64, regress=False, output=128) # 31 97 46
|
|
135
|
+# cnn_0 = create_cnn(18, 21, 1, kernel_size=(6, 6), size=64, regress=False, output=128) # 29 98 47
|
|
136
|
+# cnn_0 = create_cnn(18, 21, 1, kernel_size=(9, 9), size=64, regress=False, output=128) # 28 97 53
|
|
137
|
+cnn_0 = create_cnn(18, 20, 1, kernel_size=(3, 20), size=96, regress=False, output=96) #A23 99 33 A' 26 99 36 #B 34 98 43
|
|
138
|
+# cnn_1 = create_cnn(18, 21, 1, kernel_size=(18, 11), size=96, regress=False, output=96)
|
|
139
|
+# cnn_1 = create_cnn(9, 26, 1, kernel_size=(2, 14), size=36, regress=False, output=64)
|
|
140
|
+
|
|
141
|
+# create the input to our final set of layers as the *output* of both
|
|
142
|
+# the MLP and CNN
|
|
143
|
+combinedInput = concatenate([mlp.output, cnn_0.output])
|
|
144
|
+
|
|
145
|
+# our final FC layer head will have two dense layers, the final one
|
|
146
|
+# being our regression head
|
|
147
|
+x = Dense(1024, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
|
|
148
|
+x = Dropout(0.2)(x)
|
|
149
|
+x = Dense(1024, activation="relu")(x)
|
|
150
|
+x = Dense(1024, activation="relu")(x)
|
|
151
|
+# 在建设一层
|
|
152
|
+x = Dense(3, activation="softmax")(x)
|
|
153
|
+
|
|
154
|
+# our final model will accept categorical/numerical data on the MLP
|
|
155
|
+# input and images on the CNN input, outputting a single value (the
|
|
156
|
+# predicted price of the house)
|
|
157
|
+model = Model(inputs=[mlp.input, cnn_0.input,], outputs=x)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+print("Starting training ")
|
|
161
|
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
|
|
162
|
+
|
|
163
|
+# compile the model using mean absolute percentage error as our loss,
|
|
164
|
+# implying that we seek to minimize the absolute percentage difference
|
|
165
|
+# between our price *predictions* and the *actual prices*
|
|
166
|
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
|
|
167
|
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
|
|
168
|
+
|
|
169
|
+# train the model
|
|
170
|
+print("[INFO] training model...")
|
|
171
|
+model.fit(
|
|
172
|
+ [train_x_c, train_x_a,], train_y,
|
|
173
|
+ # validation_data=([testAttrX, testImagesX], testY),
|
|
174
|
+ # epochs=int(3*train_x_a.shape[0]/1300),
|
|
175
|
+ epochs=epochs,
|
|
176
|
+ batch_size=2048, shuffle=True,
|
|
177
|
+ callbacks=[early_stopping]
|
|
178
|
+)
|
|
179
|
+
|
|
180
|
+model.save(model_path)
|
|
181
|
+
|
|
182
|
+test_x_a = test_x[:,:18*20]
|
|
183
|
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 20, 1)
|
|
184
|
+# test_x_b = test_x[:, 9*26:9*26+9*26]
|
|
185
|
+# test_x_b = test_x_b.reshape(test_x.shape[0], 9, 26, 1)
|
|
186
|
+test_x_c = test_x[:,18*20:]
|
|
187
|
+
|
|
188
|
+# make predictions on the testing data
|
|
189
|
+print("[INFO] predicting house prices...")
|
|
190
|
+score = model.evaluate([test_x_c, test_x_a], test_y)
|
|
191
|
+
|
|
192
|
+print(score)
|
|
193
|
+print('Test score:', score[0])
|
|
194
|
+print('Test accuracy:', score[1])
|