yufeng
/
machine_learn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
							#!/usr/bin/python
# -*- coding: UTF-8 -*-
'''
最简单的mse
'''
import sys
import os
sys.path.append(os.path.abspath('..'))
from util.config import config
import numpy as np
from keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense,Dropout,Input,MaxPooling2D,BatchNormalization,concatenate
from keras.layers import LSTM
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import RandomOverSampler
import joblib
from keras.models import Sequential
from keras.models import Model
from keras.optimizers import Adam
from keras import regularizers
from keras.callbacks import EarlyStopping

epochs= 330
early_stopping = EarlyStopping(monitor='accuracy', patience=30, verbose=2)


def curce_data(x,y,y_pred):
    x=x.tolist()
    y=y.tolist()
    y_pred=y_pred.tolist()
    results=zip(x,y,y_pred)
    results=["{},{},{}".format(s[0],s[1][0],s[2][0]) for s in results ]
    return results


def read_data(path):
    with open(path) as f :
        lines=f.readlines()
    lines=[eval(line.strip()) for line in lines]
    X,z,y=zip(*lines)
    X=np.array(X)
    y=np.array(y)
    return X,y


def create_mlp(dim, regress=False):
    # define our MLP network
    model = Sequential()
    model.add(Dense(64, input_dim=dim, activation="relu"))
    model.add(Dense(64, activation="relu"))

    # check to see if the regression node should be added
    if regress:
        model.add(Dense(1, activation="linear"))

    # return our model
    return model


def create_lstm(sample, timesteps, input_dim):
    inputShape = (timesteps, input_dim)

    # define the model input
    inputs = Input(shape=inputShape)

    x = inputs

    x = LSTM(units = 64, input_shape=(timesteps, input_dim), dropout=0.2)(x)
    # x = LSTM(16*16, return_sequences=False)
    # x = Activation("relu")(x)
    x = Dense(64)(x)
    x = Dropout(0.2)(x)
    x = Activation("relu")(x)

    # construct the CNN
    model = Model(inputs, x)

    # return the CNN
    return model


def demo_1(file, model_file):
    X_train,y_train=read_data(file)
    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X_train, y_train, test_size=0.1)
    train_x_a = Xtrain.reshape(Xtrain.shape[0], windows, x_lenth)
    test_x_a = Xtest.reshape(Xtest.shape[0], windows, x_lenth)
    # 随机过采样
    # ros = RandomOverSampler(random_state=22)
    # X_rsampled, y_resampled = ros.fit_resample(X_train, y_train)
    # 一调用这个函数，就会不停地找合适的w和b 直到误差最小

    # create the MLP and CNN models
    # mlp = create_mlp(Xtrain.shape[1], regress=False)
    cnn_0 = create_lstm(train_x_a.shape[1], windows, x_lenth)
    # cnn_1 = create_cnn(18, 10, 1, kernel_size=(3, 5), filters=32, regress=False, output=120)

    # create the input to our final set of layers as the *output* of both
    # the MLP and CNN
    # combinedInput = concatenate([cnn_0.output,])

    # our final FC layer head will have two dense layers, the final one
    # being our regression head
    x = Dense(128, activation="relu", kernel_regularizer=regularizers.l1(0.003))(cnn_0.input)
    x = Dropout(0.1)(x)
    x = Dense(56, activation="relu")(x)
    x = Dense(56, activation="relu")(x)
    x = Dense(56, activation="relu")(x)
    x = Dense(56, activation="relu")(x)
    x = Flatten()(x)
    # 在建设一层
    x = Dense(2, activation="sigmoid")(x)

    # our final model will accept categorical/numerical data on the MLP
    # input and images on the CNN input, outputting a single value (the
    # predicted price of the house)
    model = Model(inputs=[cnn_0.input,], outputs=x)


    print("Starting training ")
    # h = model.fit(train_x, train_y, batch_size=4096*2, epochs=500, shuffle=True)

    # compile the model using mean absolute percentage error as our loss,
    # implying that we seek to minimize the absolute percentage difference
    # between our price *predictions* and the *actual prices*
    opt = Adam(lr=1e-3, decay=1e-3 / 200)
    model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'],
                  )

    # train the model
    print("[INFO] training model...")
    model.fit(
        [train_x_a], Ytrain,
        validation_data=([test_x_a], Ytest),
        # epochs=int(3*train_x_a.shape[0]/1300),
        epochs=epochs,
        batch_size=1024, shuffle=True,
        callbacks=[early_stopping]
    )

    model.save(model_file.split('.')[0] + '_' + '.h5')

    score = model.evaluate([test_x_a], Ytest)
    print('MIX', score)


windows = 5
x_lenth = 19

if __name__ == '__main__':
    root_dir = 'D:\\data\\quantization\\jqxx2\\'
    model_dir = 'D:\\data\\quantization\\jqxx2_svm_model\\'
    m = '000007.SH.log'  # 12
    demo_1(root_dir + m, model_dir + str(m)[:6] + '.pkl')