|
@@ -0,0 +1,205 @@
|
|
1
|
+import keras
|
|
2
|
+# -*- encoding:utf-8 -*-
|
|
3
|
+import numpy as np
|
|
4
|
+from keras.models import Sequential
|
|
5
|
+# 优化方法选用Adam(其实可选项有很多,如SGD)
|
|
6
|
+from keras.optimizers import Adam
|
|
7
|
+import random
|
|
8
|
+from keras.models import load_model
|
|
9
|
+from imblearn.over_sampling import RandomOverSampler
|
|
10
|
+from keras.utils import np_utils
|
|
11
|
+# 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
|
|
12
|
+# Flatten作用是将多位输入进行一维化
|
|
13
|
+# Dense是全连接层
|
|
14
|
+from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
|
|
15
|
+from keras.layers import LSTM
|
|
16
|
+from keras import regularizers
|
|
17
|
+from keras.models import Model
|
|
18
|
+
|
|
19
|
+from keras.callbacks import EarlyStopping
|
|
20
|
+
|
|
21
|
+early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
|
|
22
|
+
|
|
23
|
+epochs= 40
|
|
24
|
+size = 80000
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+def read_data(path):
|
|
28
|
+ lines = []
|
|
29
|
+ with open(path) as f:
|
|
30
|
+ for x in range(size): #380000
|
|
31
|
+ lines.append(eval(f.readline().strip()))
|
|
32
|
+
|
|
33
|
+ random.shuffle(lines)
|
|
34
|
+ print('读取数据完毕')
|
|
35
|
+
|
|
36
|
+ d=int(0.7*len(lines))
|
|
37
|
+
|
|
38
|
+ train_x=[s[:-2] for s in lines[0:d]]
|
|
39
|
+ train_y=[s[-1] for s in lines[0:d]]
|
|
40
|
+ test_x=[s[:-2] for s in lines[d:]]
|
|
41
|
+ test_y=[s[-1] for s in lines[d:]]
|
|
42
|
+
|
|
43
|
+ print('转换数据完毕')
|
|
44
|
+
|
|
45
|
+ ros = RandomOverSampler(random_state=0)
|
|
46
|
+ X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
|
|
47
|
+
|
|
48
|
+ print('数据重采样完毕')
|
|
49
|
+
|
|
50
|
+ return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock19_18d_train_1.log")
|
|
54
|
+
|
|
55
|
+train_x_a = train_x[:,:18*16]
|
|
56
|
+train_x_a = train_x_a.reshape(train_x.shape[0], 18, 16)
|
|
57
|
+train_x_b = train_x[:, 18*16:18*16+10*18]
|
|
58
|
+train_x_b = train_x_b.reshape(train_x.shape[0], 18, 10, 1)
|
|
59
|
+train_x_c = train_x[:,18*16+10*18:]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+def create_mlp(dim, regress=False):
|
|
63
|
+ # define our MLP network
|
|
64
|
+ model = Sequential()
|
|
65
|
+ model.add(Dense(16, input_dim=dim, activation="relu"))
|
|
66
|
+ model.add(Dense(16, activation="relu"))
|
|
67
|
+
|
|
68
|
+ # check to see if the regression node should be added
|
|
69
|
+ if regress:
|
|
70
|
+ model.add(Dense(1, activation="linear"))
|
|
71
|
+
|
|
72
|
+ # return our model
|
|
73
|
+ return model
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+def create_cnn(width, height, depth, filters=32, kernel_size=(5, 6), regress=False, output=24):
|
|
77
|
+ # initialize the input shape and channel dimension, assuming
|
|
78
|
+ # TensorFlow/channels-last ordering
|
|
79
|
+ inputShape = (width, height, 1)
|
|
80
|
+ chanDim = -1
|
|
81
|
+
|
|
82
|
+ # define the model input
|
|
83
|
+ inputs = Input(shape=inputShape)
|
|
84
|
+
|
|
85
|
+ x = inputs
|
|
86
|
+
|
|
87
|
+ # CONV => RELU => BN => POOL
|
|
88
|
+ x = Conv2D(filters, kernel_size, strides=(2,2), padding="same",
|
|
89
|
+ # data_format='channels_first'
|
|
90
|
+ )(x)
|
|
91
|
+ x = Activation("relu")(x)
|
|
92
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
93
|
+ # x = MaxPooling2D(pool_size=(2, 2))(x)
|
|
94
|
+ # if width > 2:
|
|
95
|
+ # x = Conv2D(32, (10, 6), padding="same")(x)
|
|
96
|
+ # x = Activation("relu")(x)
|
|
97
|
+ # x = BatchNormalization(axis=chanDim)(x)
|
|
98
|
+
|
|
99
|
+ # flatten the volume, then FC => RELU => BN => DROPOUT
|
|
100
|
+ x = Flatten()(x)
|
|
101
|
+ x = Dense(output)(x)
|
|
102
|
+ x = Activation("relu")(x)
|
|
103
|
+ x = BatchNormalization(axis=chanDim)(x)
|
|
104
|
+ x = Dropout(0.2)(x)
|
|
105
|
+
|
|
106
|
+ # apply another FC layer, this one to match the number of nodes
|
|
107
|
+ # coming out of the MLP
|
|
108
|
+ x = Dense(output)(x)
|
|
109
|
+ x = Activation("relu")(x)
|
|
110
|
+
|
|
111
|
+ # check to see if the regression node should be added
|
|
112
|
+ if regress:
|
|
113
|
+ x = Dense(1, activation="linear")(x)
|
|
114
|
+
|
|
115
|
+ # construct the CNN
|
|
116
|
+ model = Model(inputs, x)
|
|
117
|
+
|
|
118
|
+ # return the CNN
|
|
119
|
+ return model
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+def create_lstm(sample, timesteps, input_dim):
|
|
123
|
+ inputShape = (timesteps, input_dim)
|
|
124
|
+
|
|
125
|
+ # define the model input
|
|
126
|
+ inputs = Input(shape=inputShape)
|
|
127
|
+
|
|
128
|
+ x = inputs
|
|
129
|
+
|
|
130
|
+ x = LSTM(units = 32, input_shape=(18, 16), dropout=0.2
|
|
131
|
+ )(x)
|
|
132
|
+ # x = LSTM(16*16, return_sequences=False)
|
|
133
|
+ # x = Activation("relu")(x)
|
|
134
|
+ x = Dense(64)(x)
|
|
135
|
+ x = Dropout(0.2)(x)
|
|
136
|
+ x = Activation("relu")(x)
|
|
137
|
+
|
|
138
|
+ # construct the CNN
|
|
139
|
+ model = Model(inputs, x)
|
|
140
|
+
|
|
141
|
+ # return the CNN
|
|
142
|
+ return model
|
|
143
|
+
|
|
144
|
+# create the MLP and CNN models
|
|
145
|
+mlp = create_mlp(train_x_c.shape[1], regress=False)
|
|
146
|
+cnn_0 = create_lstm(train_x_a.shape[1], 18, 16)
|
|
147
|
+cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
|
|
148
|
+
|
|
149
|
+# create the input to our final set of layers as the *output* of both
|
|
150
|
+# the MLP and CNN
|
|
151
|
+combinedInput = concatenate([mlp.output, cnn_0.output, cnn_1.output])
|
|
152
|
+
|
|
153
|
+# our final FC layer head will have two dense layers, the final one
|
|
154
|
+# being our regression head
|
|
155
|
+x = Dense(888, activation="relu", kernel_regularizer=regularizers.l1(0.003))(combinedInput)
|
|
156
|
+x = Dropout(0.2)(x)
|
|
157
|
+x = Dense(888, activation="relu")(x)
|
|
158
|
+x = Dense(888, activation="relu")(x)
|
|
159
|
+# 在建设一层
|
|
160
|
+x = Dense(5, activation="softmax")(x)
|
|
161
|
+
|
|
162
|
+# our final model will accept categorical/numerical data on the MLP
|
|
163
|
+# input and images on the CNN input, outputting a single value (the
|
|
164
|
+# predicted price of the house)
|
|
165
|
+model = Model(inputs=[mlp.input, cnn_0.input, cnn_1.input], outputs=x)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+print("Starting training ")
|
|
169
|
+# h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
|
|
170
|
+
|
|
171
|
+# compile the model using mean absolute percentage error as our loss,
|
|
172
|
+# implying that we seek to minimize the absolute percentage difference
|
|
173
|
+# between our price *predictions* and the *actual prices*
|
|
174
|
+opt = Adam(lr=1e-3, decay=1e-3 / 200)
|
|
175
|
+model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=['accuracy'],
|
|
176
|
+ )
|
|
177
|
+
|
|
178
|
+# train the model
|
|
179
|
+print("[INFO] training model...")
|
|
180
|
+model.fit(
|
|
181
|
+ [train_x_c, train_x_a, train_x_b], train_y,
|
|
182
|
+ # validation_data=([testAttrX, testImagesX], testY),
|
|
183
|
+ # epochs=int(3*train_x_a.shape[0]/1300),
|
|
184
|
+ epochs=epochs,
|
|
185
|
+ batch_size=2048, shuffle=True,
|
|
186
|
+ callbacks=[early_stopping]
|
|
187
|
+)
|
|
188
|
+
|
|
189
|
+test_x_a = test_x[:,:18*16]
|
|
190
|
+test_x_a = test_x_a.reshape(test_x.shape[0], 18, 16)
|
|
191
|
+test_x_b = test_x[:, 18*16:18*16+10*18]
|
|
192
|
+test_x_b = test_x_b.reshape(test_x.shape[0], 18, 10, 1)
|
|
193
|
+test_x_c = test_x[:,18*16+10*18:]
|
|
194
|
+
|
|
195
|
+# make predictions on the testing data
|
|
196
|
+print("[INFO] predicting house prices...")
|
|
197
|
+score = model.evaluate([test_x_c, test_x_a, test_x_b], test_y)
|
|
198
|
+
|
|
199
|
+print(score)
|
|
200
|
+print('Test score:', score[0])
|
|
201
|
+print('Test accuracy:', score[1])
|
|
202
|
+
|
|
203
|
+path="19_18d_lstm_seq.h5"
|
|
204
|
+model.save(path)
|
|
205
|
+model=None
|