#!/usr/bin/python # -*- coding: UTF-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8') import numpy as np from sklearn.linear_model import LinearRegression from sklearn import metrics ''' 随机特征 ''' def extend_feature_repeat(x): return [x[0],x[0]] ''' 重复特征 ''' def extend_feature_random(x): import random return [x[0],random.uniform(-10,10)] def read_data(path, fun): with open(path) as f : lines=f.readlines() lines=[eval(line.strip()) for line in lines] X,y=zip(*lines) X=[fun(x) for x in X] X=np.array(X) y=np.array(y) return X,y def repeat(): X_train,y_train=read_data("train_data", extend_feature_repeat) X_test,y_test=read_data("test_data", extend_feature_repeat) model = LinearRegression() model.fit(X_train, y_train) print model.coef_ print model.intercept_ y_pred_train = model.predict(X_train) train_mse=metrics.mean_squared_error(y_train, y_pred_train) print "重复特征" print "MSE:", train_mse y_pred_test = model.predict(X_test) test_mse=metrics.mean_squared_error(y_test, y_pred_test) print "MSE:",test_mse print "推广mse差", test_mse-train_mse def random(): X_train, y_train = read_data("train_data", extend_feature_random) X_test, y_test = read_data("test_data", extend_feature_random) model = LinearRegression() model.fit(X_train, y_train) print model.coef_ print model.intercept_ y_pred_train = model.predict(X_train) train_mse = metrics.mean_squared_error(y_train, y_pred_train) print "+随机特征" print "MSE:", train_mse y_pred_test = model.predict(X_test) test_mse = metrics.mean_squared_error(y_test, y_pred_test) print "MSE:", test_mse print "推广mse差", test_mse - train_mse if __name__ == '__main__': repeat() print('-------------------') random()