cnn_train_dmi.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from keras.utils import np_utils
  11. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  12. # Flatten作用是将多位输入进行一维化
  13. # Dense是全连接层
  14. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout
  15. from keras import regularizers
  16. def read_data(path):
  17. lines = []
  18. with open(path) as f:
  19. for x in range(20000):
  20. lines.append(eval(f.readline().strip()))
  21. random.shuffle(lines)
  22. print('读取数据完毕')
  23. d=int(0.75*len(lines))
  24. train_x=[s[:-2] for s in lines[0:d]]
  25. train_y=[s[-1] for s in lines[0:d]]
  26. test_x=[s[:-2] for s in lines[d:]]
  27. test_y=[s[-1] for s in lines[d:]]
  28. print('转换数据完毕')
  29. ros = RandomOverSampler(random_state=0)
  30. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  31. print('数据重采样完毕')
  32. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  33. train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
  34. train_x = train_x.reshape(train_x.shape[0], 1,77,6)
  35. test_x = test_x.reshape(test_x.shape[0], 1,77, 6)
  36. model = Sequential()
  37. # 模型卷积层设计
  38. model.add(Conv2D(
  39. kernel_size=(5, 6), filters=64,
  40. padding='same', # 选择滤波器的扫描方式,即是否考虑边缘
  41. input_shape=(1,77,6), # 设置输入的形状
  42. # batch_input_shape=(64, 1, 28, 28),
  43. ))
  44. # 选择激活函数
  45. model.add(Activation('relu'))
  46. # # 设置下采样(池化层)
  47. # model.add(MaxPool2D(
  48. # pool_size=(4,1), # 下采样格为2*2
  49. # strides=(2,2), # 向右向下的步长
  50. # padding='same', # padding mode is 'same'
  51. # ))
  52. # 使用Flatten函数,将输入数据扁平化(因为输入数据是一个多维的形式,需要将其扁平化)
  53. model.add(Flatten()) # 将多维的输入一维化
  54. model.add(Dense(units=777, activation='relu', kernel_regularizer=regularizers.l1(0.003)))
  55. model.add(Dropout(0.2))
  56. model.add(Dense(units=777, activation='relu'))
  57. model.add(Dense(units=777, activation='relu'))
  58. model.add(Dense(units=777, activation='relu',kernel_regularizer=regularizers.l1(0.002)))
  59. model.add(Dropout(0.2))
  60. model.add(Dense(units=1024, activation='relu'))
  61. # 在建设一层
  62. model.add(Dense(5)) # 输入是个类别
  63. model.add(Activation('softmax')) # 用于分类的softmax函数
  64. adam = Adam() # 学习速率lr=0.0001
  65. model.compile(optimizer=adam,
  66. loss='categorical_crossentropy',
  67. metrics=['accuracy'])
  68. print("Starting training ")
  69. h=model.fit(train_x, train_y, batch_size=4096*2, epochs=50, shuffle=True)
  70. score = model.evaluate(test_x, test_y)
  71. print(score)
  72. print('Test score:', score[0])
  73. print('Test accuracy:', score[1])
  74. path="16_18d_cnn_seq.h5"
  75. model.save(path)
  76. model=None
  77. model=load_model(path)
  78. result=model.predict(test_x)
  79. print(result)
  80. print(test_y)