cancer_train.py 872 B

1234567891011121314151617181920212223242526272829303132333435363738
  1. # -*- encoding:utf-8 -*-
  2. from sklearn import datasets
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.linear_model import LogisticRegression
  5. from sklearn.model_selection import cross_val_predict
  6. from numpy import shape
  7. from sklearn import metrics
  8. from sklearn.metrics import log_loss
  9. import numpy as np
  10. def read_data(path):
  11. with open(path) as f:
  12. lines = f.readlines()
  13. lines = [eval(line.strip()) for line in lines]
  14. X, y = zip(*lines)
  15. X = np.array(X)
  16. y = np.array(y)
  17. return X, y
  18. X_train, y_train = read_data("cancer_train_data")
  19. X_test, y_test = read_data("cancer_test_data")
  20. model = LogisticRegression()
  21. model.fit(X_train, y_train)
  22. print (model.coef_)
  23. print (model.intercept_)
  24. y_pred = model.predict(X_test)
  25. y_pred = model.predict_proba(X_test)
  26. print y_pred
  27. loss = log_loss(y_test, y_pred)
  28. print "KL_loss:", loss