train_xsquare.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. import sys
  4. reload(sys)
  5. sys.setdefaultencoding('utf-8')
  6. import numpy as np
  7. from sklearn.linear_model import LinearRegression
  8. from sklearn import metrics
  9. def extend_feature(x):
  10. # return [x[0]]
  11. return [x[0], x[0] * x[0]]
  12. def read_data(path):
  13. with open(path) as f:
  14. lines = f.readlines()
  15. lines = [eval(line.strip()) for line in lines]
  16. X, y = zip(*lines)
  17. X = [extend_feature(x) for x in X]
  18. X = np.array(X)
  19. y = np.array(y)
  20. return X, y
  21. if __name__ == '__main__':
  22. X_train, y_train = read_data("train_paracurve_data")
  23. X_test, y_test = read_data("test_paracurve_data")
  24. model = LinearRegression()
  25. model.fit(X_train, y_train)
  26. print model.coef_
  27. print model.intercept_
  28. y_pred_train = model.predict(X_train)
  29. train_mse = metrics.mean_squared_error(y_train, y_pred_train)
  30. print "特征+平方非线性"
  31. print "MSE:", train_mse
  32. y_pred_test = model.predict(X_test)
  33. test_mse = metrics.mean_squared_error(y_test, y_pred_test)
  34. print "MSE:", test_mse
  35. print "推广mse差", test_mse - train_mse