123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- import sys
- reload(sys)
- sys.setdefaultencoding('utf-8')
- import numpy as np
- from sklearn.linear_model import LinearRegression
- from sklearn import metrics
- '''
- 随机特征
- '''
- def extend_feature_repeat(x):
- return [x[0],x[0]]
- '''
- 重复特征
- '''
- def extend_feature_random(x):
- import random
- return [x[0],random.uniform(-10,10)]
- def read_data(path, fun):
- with open(path) as f :
- lines=f.readlines()
- lines=[eval(line.strip()) for line in lines]
- X,y=zip(*lines)
- X=[fun(x) for x in X]
- X=np.array(X)
- y=np.array(y)
- return X,y
- def repeat():
- X_train,y_train=read_data("train_data", extend_feature_repeat)
- X_test,y_test=read_data("test_data", extend_feature_repeat)
- model = LinearRegression()
- model.fit(X_train, y_train)
- print model.coef_
- print model.intercept_
- y_pred_train = model.predict(X_train)
- train_mse=metrics.mean_squared_error(y_train, y_pred_train)
- print "重复特征"
- print "MSE:", train_mse
- y_pred_test = model.predict(X_test)
- test_mse=metrics.mean_squared_error(y_test, y_pred_test)
- print "MSE:",test_mse
- print "推广mse差", test_mse-train_mse
- def random():
- X_train, y_train = read_data("train_data", extend_feature_random)
- X_test, y_test = read_data("test_data", extend_feature_random)
- model = LinearRegression()
- model.fit(X_train, y_train)
- print model.coef_
- print model.intercept_
- y_pred_train = model.predict(X_train)
- train_mse = metrics.mean_squared_error(y_train, y_pred_train)
- print "+随机特征"
- print "MSE:", train_mse
- y_pred_test = model.predict(X_test)
- test_mse = metrics.mean_squared_error(y_test, y_pred_test)
- print "MSE:", test_mse
- print "推广mse差", test_mse - train_mse
- if __name__ == '__main__':
- repeat()
- print('-------------------')
- random()
|