# -*- encoding:utf-8 -*- import numpy as np from keras.models import Sequential from keras.layers import Dense,Dropout import random from keras import regularizers from imblearn.over_sampling import RandomOverSampler import joblib def read_data(path): lines = [] with open(path) as f: # for x in range(30000): # lines.append(eval(f.readline().strip())) for line in f.readlines()[:]: lines.append(eval(line.strip())) random.shuffle(lines) print('读取数据完毕') d=int(0.95*len(lines)) size = len(lines[0]) train_x=[s[:size - 2] for s in lines[0:d]] train_y=[s[size-1] for s in lines[0:d]] test_x=[s[:size - 2] for s in lines[d:]] test_y=[s[size-1] for s in lines[d:]] print('转换数据完毕') ros = RandomOverSampler(random_state=0) X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y)) print('数据重采样完毕') return X_resampled,y_resampled,np.array(test_x),np.array(test_y) def resample(path): lines = [] with open(path) as f: i = 0 for x in range(110000): # print(i) lines.append(eval(f.readline().strip())) i = i + 1 estimator = joblib.load('km_dmi_18.pkl') file_list = [] for x in range(0, 12): file_list.append(open('D:\\data\\quantization\\kmeans\\stock9_18_train_' + str(x) + '.log', 'a')) x = 21 # 每条数据项数 k = 18 # 周期 for line in lines: v = line[1:x*k + 1] v = np.array(v) v = v.reshape(k, x) v = v[:,4:8] v = v.reshape(1, 4*k) # print(v) r = estimator.predict(v) file_list[r[0]].write(str(line) + '\n') def mul_train(): # for x in range(0, 12): for x in [11,0,1,3,8,9]: # for x in [2,4,7,10]: score = train(input_dim=384, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock9_18_train_" + str(x) + ".log", model_name='18d_dnn_seq_' + str(x) + '.h5') with open('D:\\data\\quantization\\kmeans\\stock9_18_dmi.log', 'a') as f: f.write(str(x) + ':' + str(score[1]) + '\n') def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''): train_x,train_y,test_x,test_y=read_data(file_path) model = Sequential() model.add(Dense(units=120+input_dim, input_dim=input_dim, activation='relu')) model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.002))) model.add(Dropout(0.2)) model.add(Dense(units=120+input_dim, activation='relu')) model.add(Dense(units=120+input_dim, activation='relu')) model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.002))) model.add(Dropout(0.2)) model.add(Dense(units=120 + input_dim, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(units=120+input_dim, activation='selu')) model.add(Dropout(0.2)) model.add(Dense(units=120+input_dim, activation='selu')) model.add(Dense(units=512, activation='relu')) model.add(Dense(units=result_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy']) print("Starting training ") model.fit(train_x, train_y, batch_size=4096, epochs=900 + 6*int(len(train_x)/600), shuffle=True) score = model.evaluate(test_x, test_y) print(score) print('Test score:', score[0]) print('Test accuracy:', score[1]) model.save(model_name) return score # model=None # model=load_model(model_name) # result=model.predict(test_x) # print(result) # print(test_y) if __name__ == '__main__': # resample('D:\\data\\quantization\\stock9_18_1.log') mul_train()