cnn_train_dmi.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import keras
  2. # -*- encoding:utf-8 -*-
  3. import numpy as np
  4. from keras.models import Sequential
  5. # 优化方法选用Adam(其实可选项有很多,如SGD)
  6. from keras.optimizers import Adam
  7. import random
  8. from keras.models import load_model
  9. from imblearn.over_sampling import RandomOverSampler
  10. from keras.utils import np_utils
  11. # 用于模型初始化,Conv2D模型初始化、Activation激活函数,MaxPooling2D是池化层
  12. # Flatten作用是将多位输入进行一维化
  13. # Dense是全连接层
  14. from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout
  15. from keras import regularizers
  16. def read_data(path):
  17. lines = []
  18. with open(path) as f:
  19. for x in range(200000):
  20. lines.append(eval(f.readline().strip()))
  21. random.shuffle(lines)
  22. print('读取数据完毕')
  23. d=int(0.75*len(lines))
  24. train_x=[s[:-2] for s in lines[0:d]]
  25. train_y=[s[-1] for s in lines[0:d]]
  26. test_x=[s[:-2] for s in lines[d:]]
  27. test_y=[s[-1] for s in lines[d:]]
  28. print('转换数据完毕')
  29. ros = RandomOverSampler(random_state=0)
  30. X_resampled, y_resampled = ros.fit_sample(np.array(train_x), np.array(train_y))
  31. print('数据重采样完毕')
  32. return X_resampled,y_resampled,np.array(test_x),np.array(test_y)
  33. train_x,train_y,test_x,test_y=read_data("D:\\data\\quantization\\stock16_18d_train.log")
  34. train_x = train_x.reshape(train_x.shape[0], 1,6,77)
  35. test_x = test_x.reshape(test_x.shape[0], 1,6, 77)
  36. model = Sequential()
  37. # 模型卷积层设计
  38. model.add(Conv2D(
  39. nb_filter=32, # 第一层设置32个滤波器
  40. nb_row=10,
  41. nb_col=6, # 设置滤波器的大小为5*5
  42. padding='same', # 选择滤波器的扫描方式,即是否考虑边缘
  43. input_shape=(1,6,77), # 设置输入的形状
  44. # batch_input_shape=(64, 1, 28, 28),
  45. ))
  46. # 选择激活函数
  47. model.add(Activation('relu'))
  48. # 设置下采样(池化层)
  49. model.add(MaxPool2D(
  50. pool_size=(4,1), # 下采样格为2*2
  51. strides=(2,2), # 向右向下的步长
  52. padding='same', # padding mode is 'same'
  53. ))
  54. # 使用Flatten函数,将输入数据扁平化(因为输入数据是一个多维的形式,需要将其扁平化)
  55. model.add(Flatten()) # 将多维的输入一维化
  56. model.add(Dense(units=777, activation='relu', kernel_regularizer=regularizers.l1(0.003)))
  57. model.add(Dropout(0.2))
  58. model.add(Dense(units=777, activation='relu'))
  59. model.add(Dense(units=777, activation='relu'))
  60. model.add(Dense(units=777, activation='relu',kernel_regularizer=regularizers.l1(0.002)))
  61. model.add(Dropout(0.2))
  62. model.add(Dense(units=1024, activation='relu'))
  63. # 在建设一层
  64. model.add(Dense(5)) # 输入是个类别
  65. model.add(Activation('softmax')) # 用于分类的softmax函数
  66. adam = Adam() # 学习速率lr=0.0001
  67. model.compile(optimizer=adam,
  68. loss='categorical_crossentropy',
  69. metrics=['accuracy'])
  70. print("Starting training ")
  71. h=model.fit(train_x, train_y, batch_size=4096*2, epochs=150, shuffle=True)
  72. score = model.evaluate(test_x, test_y)
  73. print(score)
  74. print('Test score:', score[0])
  75. print('Test accuracy:', score[1])
  76. path="16_18d_cnn_seq.h5"
  77. model.save(path)
  78. model=None
  79. model=load_model(path)
  80. result=model.predict(test_x)
  81. print(result)
  82. print(test_y)