import keras # -*- encoding:utf-8 -*- import numpy as np from keras.models import Sequential from keras.layers import Dense,Dropout import random from keras import regularizers from keras.models import load_model from imblearn.over_sampling import RandomOverSampler import joblib import tensorflow def read_data(path): lines = [] with open(path) as f: # for x in range(30000): # lines.append(eval(f.readline().strip())) for line in f.readlines()[:]: lines.append(eval(line.strip())) random.shuffle(lines) print('读取数据完毕') d=int(0.95*len(lines)) size = len(lines[0]) train_x=[s[:size - 2] for s in lines[0:d]] train_y=[s[size-1] for s in lines[0:d]] test_x=[s[:size - 2] for s in lines[d:]] test_y=[s[size-1] for s in lines[d:]] print('转换数据完毕') ros = RandomOverSampler(random_state=0) X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y)) print('数据重采样完毕') return X_resampled,y_resampled,np.array(test_x),np.array(test_y) def resample(path): lines = [] with open(path) as f: for x in range(330000): lines.append(eval(f.readline().strip())) estimator = joblib.load('km.pkl') x = 17 for line in lines: v = line[1:x*10 + 1] v = np.array(v) v = v.reshape(10, x) v = v[:,0:4] v = v.reshape(1, 40) # print(v) r = estimator.predict(v) with open('D:\\data\\quantization\\kmeans\\stock8_14_train_' + str(r[0]) + '.log', 'a') as f: f.write(str(line) + '\n') def mul_train(): # for x in range(0, 16): for x in [0]: train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock2_10_" + str(x) + ".log", model_name='5d_dnn_seq_' + str(x) + '.h5') def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''): train_x,train_y,test_x,test_y=read_data(file_path) model = Sequential() model.add(Dense(units=120+input_dim, input_dim=input_dim, activation='relu')) model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.001))) model.add(Dense(units=120+input_dim, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(units=120 + input_dim, activation='relu')) model.add(Dropout(0.2)) # model.add(Dense(units=120+input_dim, activation='selu')) # model.add(Dropout(0.1)) # model.add(Dense(units=60+input_dim, activation='selu')) # model.add(Dropout(0.2)) model.add(Dense(units=512, activation='relu')) model.add(Dense(units=result_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy']) print("Starting training ") # model.fit(train_x, train_y, batch_size=1024, epochs=400 + 4*int(len(train_x)/1000), shuffle=True) model.fit(train_x, train_y, batch_size=2048, epochs=900 + 9*int(len(train_x)/700), shuffle=True) score = model.evaluate(test_x, test_y) print(score) print('Test score:', score[0]) print('Test accuracy:', score[1]) model.save(model_name) # model=None # model=load_model(model_name) # result=model.predict(test_x) # print(result) # print(test_y) if __name__ == '__main__': # train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5') # train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name='15m_dnn_seq.h5') resample('D:\\data\\quantization\\stock8_14.log') # mul_train()