123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- import keras
- # -*- encoding:utf-8 -*-
- import numpy as np
- from keras.models import Sequential
- from keras.layers import Dense,Dropout
- import random
- from keras import regularizers
- from keras.models import load_model
- from imblearn.over_sampling import RandomOverSampler
- import joblib
- import tensorflow
- def read_data(path):
- lines = []
- with open(path) as f:
- # for x in range(30000):
- # lines.append(eval(f.readline().strip()))
- for line in f.readlines()[:]:
- lines.append(eval(line.strip()))
- random.shuffle(lines)
- print('读取数据完毕')
- d=int(0.95*len(lines))
- size = len(lines[0])
- train_x=[s[:size - 2] for s in lines[0:d]]
- train_y=[s[size-1] for s in lines[0:d]]
- test_x=[s[:size - 2] for s in lines[d:]]
- test_y=[s[size-1] for s in lines[d:]]
- print('转换数据完毕')
- ros = RandomOverSampler(random_state=0)
- X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
- print('数据重采样完毕')
- return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
- def resample(path):
- lines = []
- with open(path) as f:
- for x in range(330000):
- lines.append(eval(f.readline().strip()))
- estimator = joblib.load('km.pkl')
- x = 17
- for line in lines:
- v = line[1:x*10 + 1]
- v = np.array(v)
- v = v.reshape(10, x)
- v = v[:,0:4]
- v = v.reshape(1, 40)
- # print(v)
- r = estimator.predict(v)
- with open('D:\\data\\quantization\\kmeans\\stock8_14_train_' + str(r[0]) + '.log', 'a') as f:
- f.write(str(line) + '\n')
- def mul_train():
- # for x in range(0, 16):
- for x in [0]:
- train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock2_10_" + str(x) + ".log",
- model_name='5d_dnn_seq_' + str(x) + '.h5')
- def train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''):
- train_x,train_y,test_x,test_y=read_data(file_path)
- model = Sequential()
- model.add(Dense(units=120+input_dim, input_dim=input_dim, activation='relu'))
- model.add(Dense(units=120+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.001)))
- model.add(Dense(units=120+input_dim, activation='relu'))
- model.add(Dropout(0.1))
- model.add(Dense(units=120 + input_dim, activation='relu'))
- model.add(Dropout(0.2))
- # model.add(Dense(units=120+input_dim, activation='selu'))
- # model.add(Dropout(0.1))
- # model.add(Dense(units=60+input_dim, activation='selu'))
- # model.add(Dropout(0.2))
- model.add(Dense(units=512, activation='relu'))
- model.add(Dense(units=result_class, activation='softmax'))
- model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
- print("Starting training ")
- # model.fit(train_x, train_y, batch_size=1024, epochs=400 + 4*int(len(train_x)/1000), shuffle=True)
- model.fit(train_x, train_y, batch_size=2048, epochs=900 + 9*int(len(train_x)/700), shuffle=True)
- score = model.evaluate(test_x, test_y)
- print(score)
- print('Test score:', score[0])
- print('Test accuracy:', score[1])
- model.save(model_name)
- # model=None
- # model=load_model(model_name)
- # result=model.predict(test_x)
- # print(result)
- # print(test_y)
- if __name__ == '__main__':
- # train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
- # train(input_dim=400, result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name='15m_dnn_seq.h5')
- resample('D:\\data\\quantization\\stock8_14.log')
- # mul_train()
|