# -*- encoding:utf-8 -*- import numpy as np from keras.models import Sequential from keras.layers import Dense,Dropout import random from keras import regularizers from imblearn.over_sampling import RandomOverSampler import joblib def read_data(path): lines = [] with open(path) as f: # for x in range(30000): # lines.append(eval(f.readline().strip())) for line in f.readlines()[:]: lines.append(eval(line.strip())) random.shuffle(lines) print('读取数据完毕') d=int(0.81*len(lines)) size = len(lines[0]) train_x=[s[:size - 2] for s in lines[0:d]] train_y=[s[size-1] for s in lines[0:d]] test_x=[s[:size - 2] for s in lines[d:]] test_y=[s[size-1] for s in lines[d:]] print('转换数据完毕') ros = RandomOverSampler(random_state=0) X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y)) print('数据重采样完毕') return X_resampled,y_resampled,np.array(test_x),np.array(test_y) data_dir = 'D:\\data\\quantization\\' def resample(path, suffix='test'): lines = [] with open(data_dir + path + '.log') as f: i = 0 for x in range(64000): # 56万 11万 6.4万 # print(i) lines.append(eval(f.readline().strip())) i = i + 1 estimator = joblib.load('km_dmi_18.pkl') file_list = [] for x in range(0, 12): file_list.append(open(data_dir + 'kmeans\\' + path[:17] + '_' + str(x) + '.log', 'a')) # stock14_18d_train x = 16 # 每条数据项数 k = 18 # 周期 for line in lines: v = line[0:x*k] v = np.array(v) v = v.reshape(k, x) v = v[:,6:10] v = v.reshape(1, 4*k) # print(v) r = estimator.predict(v) file_list[r[0]].write(str(line) + '\n') def mul_train(name="10_18d"): # for x in range(8, 12): for x in [8,5]: score = train(input_dim=480, result_class=5, file_path=data_dir + "kmeans\\stock"+ name + "_train_" + str(x) + ".log", model_name=name + '_dnn_seq_' + str(x) + '.h5') with open(data_dir + 'stock' + name + '_dmi.log', 'a') as f: f.write(str(x) + ':' + str(score[1]) + '\n') def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''): train_x,train_y,test_x,test_y=read_data(file_path) model = Sequential() model.add(Dense(units=320+input_dim, input_dim=input_dim, activation='relu', kernel_regularizer=regularizers.l1(0.003))) model.add(Dense(units=300+input_dim, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(units=220+input_dim, activation='relu')) model.add(Dense(units=220+input_dim, activation='relu')) model.add(Dense(units=220+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.002))) model.add(Dropout(0.2)) model.add(Dense(units=320 + input_dim, activation='relu')) # model.add(Dropout(0.2)) # model.add(Dense(units=120+input_dim, activation='selu')) # model.add(Dense(units=120+input_dim, activation='selu')) model.add(Dense(units=666, activation='relu')) model.add(Dense(units=result_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy']) print("Starting training ") model.fit(train_x, train_y, batch_size=4096*4, epochs=400 + 4*int(len(train_x)/1600), shuffle=True) score = model.evaluate(test_x, test_y) print(score) print('Test score:', score[0]) print('Test accuracy:', score[1]) model.save(model_name) return score # model=None # model=load_model(model_name) # result=model.predict(test_x) # print(result) # print(test_y) if __name__ == '__main__': # resample('stock19_18d_test', suffix='test') mul_train('19_18d')