123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- import sys
- import matplotlib.pyplot as plt
- import numpy as np
- reload(sys)
- sys.setdefaultencoding('utf-8')
- import numpy as np
- from sklearn.linear_model import LinearRegression
- from sklearn import metrics
- '''
- lesson4
- 把特征性扩展平方
- '''
- def extend_feature(x):
- return [x[0], x[0] * x[0]]
- def read_data(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = np.array(X)
- y = np.array(y)
- return X, y
- def read_data2(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = [extend_feature(x) for x in X]
- X = np.array(X)
- y = np.array(y)
- return X, y
- def drawScatterAndLine(p, q, w, b):
- plt.scatter(p, q)
- plt.xlabel('p')
- plt.ylabel('q')
- plt.title('line regesion')
- x = np.arange(-11, 11)
- y = w * x + b
- plt.plot(x, y, color='red')
- plt.show()
- def drawScatterAndLine2(p, q, w, b):
- plt.scatter(p, q)
- plt.xlabel('p')
- plt.ylabel('q')
- plt.title('line regesion')
- x = np.arange(-11, 11)
- y = w[0] * x + w[1]*x*x + b
- plt.plot(x, y, color='red')
- plt.show()
- def test1():
- X_train, y_train = read_data("train_paracurve_data")
- X_test, y_test = read_data("test_paracurve_data")
- model = LinearRegression()
- model.fit(X_train, y_train)
- print model.coef_
- print model.intercept_
- y_pred_train = model.predict(X_train)
- train_mse = metrics.mean_squared_error(y_train, y_pred_train)
- print "特征+平方非线性"
- print "MSE:", train_mse
- y_pred_test = model.predict(X_test)
- test_mse = metrics.mean_squared_error(y_test, y_pred_test)
- print "MSE:", test_mse
- print "推广mse差", test_mse - train_mse
- return X_train, y_train, model.coef_, model.intercept_
- def test2():
- print("---------特征性修改平方------------")
- X_train, y_train = read_data2("train_paracurve_data")
- X_test, y_test = read_data2("test_paracurve_data")
- model = LinearRegression()
- model.fit(X_train, y_train)
- print model.coef_
- print model.intercept_
- y_pred_train = model.predict(X_train)
- train_mse = metrics.mean_squared_error(y_train, y_pred_train)
- print "特征+平方非线性"
- print "MSE:", train_mse
- y_pred_test = model.predict(X_test)
- test_mse = metrics.mean_squared_error(y_test, y_pred_test)
- print "MSE:", test_mse
- print "推广mse差", test_mse - train_mse
- return X_train, y_train, model.coef_, model.intercept_
- if __name__ == '__main__':
- p,q,w,b = test1()
- p = [i[0] for i in p.tolist()]
- q = [i[0] for i in q.tolist()]
- w = w[0]
- b = b[0]
- drawScatterAndLine(p, q, w, b)
- p,q,w,b = test2()
- p = [i[0] for i in p.tolist()]
- q = [i[0] for i in q.tolist()]
- w = w[0]
- b = b[0]
- drawScatterAndLine2(p, q, w, b)
|