cancer_train_l1l2.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # -*- encoding:utf-8 -*-
  2. from sklearn import datasets
  3. from sklearn.linear_model import LogisticRegression
  4. from numpy import shape
  5. from sklearn import metrics
  6. import numpy as np
  7. def read_data(path):
  8. with open(path) as f:
  9. lines = f.readlines()
  10. lines = [eval(line.strip()) for line in lines]
  11. X, y = zip(*lines)
  12. X = np.array(X)
  13. y = np.array(y)
  14. return X, y
  15. X_train, y_train = read_data("cancer_train_data")
  16. X_test, y_test = read_data("cancer_test_data")
  17. def train_model(reg):
  18. print reg
  19. model = LogisticRegression(penalty=reg)
  20. model.fit(X_train, y_train)
  21. print "w", model.coef_
  22. # print (model.intercept_)
  23. y_pred_train = model.predict(X_train)
  24. y_pred_test = model.predict(X_test)
  25. e_train = metrics.mean_squared_error(y_train, y_pred_train)
  26. e_test = metrics.mean_squared_error(y_test, y_pred_test)
  27. kl_train = metrics.log_loss(y_train, y_pred_train)
  28. kl_test = metrics.log_loss(y_test, y_pred_test)
  29. print "训练集MSE:{}, KL:{}".format(e_train, kl_train)
  30. print "测试集MSE:{}, KL:{}".format(e_test, kl_test)
  31. print "训练测试差异{}".format(e_test-e_train)
  32. print
  33. # train_model(reg="None")
  34. train_model(reg="l1")
  35. train_model(reg="l2")