123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- import keras
- # -*- encoding:utf-8 -*-
- import numpy as np
- from keras.models import Sequential
- from keras.layers import Dense,Dropout
- import random
- from keras import regularizers
- from keras.callbacks import EarlyStopping
- from imblearn.over_sampling import RandomOverSampler
- import joblib
- import tensorflow
- early_stopping = EarlyStopping(monitor='accuracy', patience=5, verbose=2)
- def read_data(path):
- lines = []
- with open(path) as f:
- for line in f.readlines()[:]:
- lines.append(eval(line.strip()))
- with open("D:\\data\\quantization\\stock578N_12d_train3.log") as f:
- for line in f.readlines()[:]:
- lines.append(eval(line.strip()))
- random.shuffle(lines)
- print('读取数据完毕')
- d=int(0.95*len(lines))
- size = len(lines[0])
- train_x=[s[:size - 2] for s in lines[0:d]]
- train_y=[s[size-1] for s in lines[0:d]]
- test_x=[s[:size - 2] for s in lines[d:]]
- test_y=[s[size-1] for s in lines[d:]]
- print('转换数据完毕')
- ros = RandomOverSampler(random_state=0)
- X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
- print('数据重采样完毕')
- return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
- def resample(path):
- lines = []
- with open(path) as f:
- for x in range(330000):
- lines.append(eval(f.readline().strip()))
- estimator = joblib.load('km.pkl')
- x = 17
- for line in lines:
- v = line[1:x*10 + 1]
- v = np.array(v)
- v = v.reshape(10, x)
- v = v[:,0:4]
- v = v.reshape(1, 40)
- # print(v)
- r = estimator.predict(v)
- with open('D:\\data\\quantization\\kmeans\\stock8_14_train_' + str(r[0]) + '.log', 'a') as f:
- f.write(str(line) + '\n')
- def mul_train():
- # for x in range(0, 16):
- for x in [0]:
- train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\kmeans\\stock2_10_" + str(x) + ".log",
- model_name='5d_dnn_seq_' + str(x) + '.h5')
- def train(result_class=3, file_path="D:\\data\\quantization\\stock6.log", model_name=''):
- train_x,train_y,test_x,test_y=read_data(file_path)
- input_dim = train_x.shape[1]
- model = Sequential()
- model.add(Dense(units=320+input_dim, input_dim=input_dim, activation='relu'))
- model.add(Dense(units=320+input_dim, activation='relu',kernel_regularizer=regularizers.l1(0.001)))
- model.add(Dense(units=320+input_dim, activation='relu'))
- model.add(Dropout(0.1))
- # model.add(Dense(units=220 + input_dim, activation='relu'))
- # model.add(Dropout(0.1))
- model.add(Dense(units=320+input_dim, activation='selu'))
- model.add(Dropout(0.1))
- model.add(Dense(units=320+input_dim, activation='selu'))
- # model.add(Dropout(0.1))
- model.add(Dense(units=512, activation='relu'))
- model.add(Dense(units=result_class, activation='softmax'))
- model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])
- print("Starting training ")
- # model.fit(train_x, train_y, batch_size=1024, epochs=400 + 4*int(len(train_x)/1000), shuffle=True)
- model.fit(train_x, train_y, batch_size=4096*2, epochs=128, shuffle=True, callbacks=[early_stopping])
- score = model.evaluate(test_x, test_y)
- print(score)
- print('Test score:', score[0])
- print('Test accuracy:', score[1])
- model.save(model_name)
- # model=None
- # model=load_model(model_name)
- # result=model.predict(test_x)
- # print(result)
- # print(test_y)
- '''
- 559 input_dim=53 24,101,47
- 560 input_dim=63 23,101,44
- 561 6d input_dim=63 21,100,50
- 562 3d input_dim=35 23,100,48
- 563 6d input_dim=62 22,101,46
- 564 6d input_dim=26 29,100,47
- 565 6d input_dim=26
- 566 6d input_dim=26 42,98,57 随机44,97,60
- 567 2d input_dim=70 >0.9 30,100,51 随机44,97,60
- 568 3d-3 input_dim= >0.7 35,97,64
- 569 4d-3 input_dim= >0.7 33,100,52 >0.9 33,100.3,50
- 571 6d-3 >0.7 37,100,48
- 572 5d-3 + macd+roc 37,101,46
- 573 +index 32,101,44
- 574 modify 35,101.9,43
- 575 modify 34,101,45
- 576 去掉index 35,101,47
- 577 加上r0_ratio 34,100,48
- 578 570修正 30,101,47
- 578A 38,101.7 44
- 578B 4class
- 578C 加上指数 38,99,52
- 578D 指数涨幅 34,100,50
- 578E 涨停数 39,100,51
- 578F 加上是否最高最低 34,,100.9,48
- 579G 换成dmi 32 100.9 47
- 579H 回退 去掉roc等 39,101.5,45
- 579I 加上是否最高最低 30,100,50
- 579J 加上DMI 34,101,47
- 574A >0.8 34,98,59 36,1.86,25
- 570 5d-3 input_dim= >0.9 32,102,42 随机45,99,56
- 578K 用上日收盘价 39,101,46
- 578L 仅ROC 41,101.6,43 41,2.036,29
- 578M 仅macd 41,101.7,43 41,2.035,28
- 578N 仅DMI 42,2.079,33|全数据50,2.67,72 随机45,1.82,20
- 578O 都去掉 41,2.013,29
- 580 去掉Low,High
- '''
- if __name__ == '__main__':
- # train(input_dim=176, result_class=5, file_path="D:\\data\\quantization\\stock6_5.log", model_name='5d_dnn_seq.h5')
- train(result_class=3, file_path="D:\\data\\quantization\\stock580_12d_train2.log", model_name='5d_580_dnn_seq.h5')
- # resample('D:\\data\\quantization\\stock8_14.log')
- # mul_train()
|