# -*- encoding:utf-8 -*-
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from numpy import shape
from sklearn import metrics
from sklearn.metrics import log_loss
import numpy as np


def read_data(path):
    with open(path) as f:
        lines = f.readlines()
    lines = [eval(line.strip()) for line in lines]
    X, y = zip(*lines)
    X = np.array(X)
    y = np.array(y)
    return X, y


X_train, y_train = read_data("cancer_train_data")
X_test, y_test = read_data("cancer_test_data")

model = LogisticRegression()
model.fit(X_train, y_train)

print (model.coef_)
print (model.intercept_)

y_pred = model.predict(X_test)
y_pred = model.predict_proba(X_test)
print y_pred
loss = log_loss(y_test, y_pred)
print "KL_loss:", loss