1234567891011121314151617181920212223242526272829303132333435363738 |
- # -*- encoding:utf-8 -*-
- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LogisticRegression
- from sklearn.model_selection import cross_val_predict
- from numpy import shape
- from sklearn import metrics
- from sklearn.metrics import log_loss
- import numpy as np
- def read_data(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = np.array(X)
- y = np.array(y)
- return X, y
- X_train, y_train = read_data("cancer_train_data")
- X_test, y_test = read_data("cancer_test_data")
- model = LogisticRegression()
- model.fit(X_train, y_train)
- print (model.coef_)
- print (model.intercept_)
- y_pred = model.predict(X_test)
- y_pred = model.predict_proba(X_test)
- print y_pred
- loss = log_loss(y_test, y_pred)
- print "KL_loss:", loss
|