#!/usr/bin/python # -*- coding: UTF-8 -*- import sys import matplotlib.pyplot as plt import numpy as np reload(sys) sys.setdefaultencoding('utf-8') import numpy as np from sklearn.linear_model import LinearRegression from sklearn import metrics ''' lesson4 把特征性扩展平方 ''' def extend_feature(x): return [x[0], x[0] * x[0]] def read_data(path): with open(path) as f: lines = f.readlines() lines = [eval(line.strip()) for line in lines] X, y = zip(*lines) X = np.array(X) y = np.array(y) return X, y def read_data2(path): with open(path) as f: lines = f.readlines() lines = [eval(line.strip()) for line in lines] X, y = zip(*lines) X = [extend_feature(x) for x in X] X = np.array(X) y = np.array(y) return X, y def drawScatterAndLine(p, q, w, b): plt.scatter(p, q) plt.xlabel('p') plt.ylabel('q') plt.title('line regesion') x = np.arange(-11, 11) y = w * x + b plt.plot(x, y, color='red') plt.show() def drawScatterAndLine2(p, q, w, b): plt.scatter(p, q) plt.xlabel('p') plt.ylabel('q') plt.title('line regesion') x = np.arange(-11, 11) y = w[0] * x + w[1]*x*x + b plt.plot(x, y, color='red') plt.show() def test1(): X_train, y_train = read_data("train_paracurve_data") X_test, y_test = read_data("test_paracurve_data") model = LinearRegression() model.fit(X_train, y_train) print model.coef_ print model.intercept_ y_pred_train = model.predict(X_train) train_mse = metrics.mean_squared_error(y_train, y_pred_train) print "特征+平方非线性" print "MSE:", train_mse y_pred_test = model.predict(X_test) test_mse = metrics.mean_squared_error(y_test, y_pred_test) print "MSE:", test_mse print "推广mse差", test_mse - train_mse return X_train, y_train, model.coef_, model.intercept_ def test2(): print("---------特征性修改平方------------") X_train, y_train = read_data2("train_paracurve_data") X_test, y_test = read_data2("test_paracurve_data") model = LinearRegression() model.fit(X_train, y_train) print model.coef_ print model.intercept_ y_pred_train = model.predict(X_train) train_mse = metrics.mean_squared_error(y_train, y_pred_train) print "特征+平方非线性" print "MSE:", train_mse y_pred_test = model.predict(X_test) test_mse = metrics.mean_squared_error(y_test, y_pred_test) print "MSE:", test_mse print "推广mse差", test_mse - train_mse return X_train, y_train, model.coef_, model.intercept_ if __name__ == '__main__': p,q,w,b = test1() p = [i[0] for i in p.tolist()] q = [i[0] for i in q.tolist()] w = w[0] b = b[0] drawScatterAndLine(p, q, w, b) p,q,w,b = test2() p = [i[0] for i in p.tolist()] q = [i[0] for i in q.tolist()] w = w[0] b = b[0] drawScatterAndLine2(p, q, w, b)