123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- '''
- 最简单的mse
- '''
- import sys
- import os
- sys.path.append(os.path.abspath('..'))
- from util.config import config
- import numpy as np
- from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
- from keras.layers import LSTM
- from sklearn import metrics
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import accuracy_score
- from imblearn.over_sampling import RandomOverSampler
- import joblib
- from keras.models import Sequential
- from keras.models import Model
- from keras.optimizers import Adam
- from keras import regularizers
- from keras.callbacks import EarlyStopping
- epochs= 330
- early_stopping = EarlyStopping(monitor='accuracy', patience=30, verbose=2)
- def curce_data(x,y,y_pred):
- x=x.tolist()
- y=y.tolist()
- y_pred=y_pred.tolist()
- results=zip(x,y,y_pred)
- results=["{},{},{}".format(s[0],s[1][0],s[2][0]) for s in results ]
- return results
- def read_data(path):
- with open(path) as f :
- lines=f.readlines()
- lines=[eval(line.strip()) for line in lines]
- X,z,y=zip(*lines)
- X=np.array(X)
- y=np.array(y)
- return X,y
- def create_mlp(dim, regress=False):
- # define our MLP network
- model = Sequential()
- model.add(Dense(64, input_dim=dim, activation="relu"))
- model.add(Dense(64, activation="relu"))
- # check to see if the regression node should be added
- if regress:
- model.add(Dense(1, activation="linear"))
- # return our model
- return model
- def create_lstm(sample, timesteps, input_dim):
- inputShape = (timesteps, input_dim)
- # define the model input
- inputs = Input(shape=inputShape)
- x = inputs
- x = LSTM(units = 64, input_shape=(timesteps, input_dim), dropout=0.2)(x)
- # x = LSTM(16*16, return_sequences=False)
- # x = Activation("relu")(x)
- x = Dense(64)(x)
- x = Dropout(0.2)(x)
- x = Activation("relu")(x)
- # construct the CNN
- model = Model(inputs, x)
- # return the CNN
- return model
- def demo_1(file, model_file):
- X_train,y_train=read_data(file)
- Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_train, y_train, test_size=0.1)
- train_x_a = Xtrain.reshape(Xtrain.shape[0], windows, x_lenth)
- test_x_a = Xtest.reshape(Xtest.shape[0], windows, x_lenth)
- # 随机过采样
- # ros = RandomOverSampler(random_state=22)
- # X_rsampled, y_resampled = ros.fit_resample(X_train, y_train)
- # 一调用这个函数,就会不停地找合适的w和b 直到误差最小
- # create the MLP and CNN models
- # mlp = create_mlp(Xtrain.shape[1], regress=False)
- cnn_0 = create_lstm(train_x_a.shape[1], windows, x_lenth)
- # cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)
- # create the input to our final set of layers as the *output* of both
- # the MLP and CNN
- # combinedInput = concatenate([cnn_0.output,])
- # our final FC layer head will have two dense layers, the final one
- # being our regression head
- x = Dense(128, activation="relu", kernel_regularizer=regularizers.l1(0.003))(cnn_0.input)
- x = Dropout(0.1)(x)
- x = Dense(56, activation="relu")(x)
- x = Dense(56, activation="relu")(x)
- x = Dense(56, activation="relu")(x)
- x = Dense(56, activation="relu")(x)
- x = Flatten()(x)
- # 在建设一层
- x = Dense(2, activation="sigmoid")(x)
- # our final model will accept categorical/numerical data on the MLP
- # input and images on the CNN input, outputting a single value (the
- # predicted price of the house)
- model = Model(inputs=[cnn_0.input,], outputs=x)
- print("Starting training ")
- # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)
- # compile the model using mean absolute percentage error as our loss,
- # implying that we seek to minimize the absolute percentage difference
- # between our price *predictions* and the *actual prices*
- opt = Adam(lr=1e-3, decay=1e-3 / 200)
- model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'],
- )
- # train the model
- print("[INFO] training model...")
- model.fit(
- [train_x_a], Ytrain,
- validation_data=([test_x_a], Ytest),
- # epochs=int(3*train_x_a.shape[0]/1300),
- epochs=epochs,
- batch_size=1024, shuffle=True,
- callbacks=[early_stopping]
- )
- model.save(model_file.split('.')[0] + '_' + '.h5')
- score = model.evaluate([test_x_a], Ytest)
- print('MIX', score)
- windows = 5
- x_lenth = 19
- if __name__ == '__main__':
- root_dir = 'D:\\data\\quantization\\jqxx2\\'
- model_dir = 'D:\\data\\quantization\\jqxx2_svm_model\\'
- m = '000007.SH.log' # 12
- demo_1(root_dir + m, model_dir + str(m)[:6] + '.pkl')
|