1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # -*- encoding:utf-8 -*-
- from sklearn import datasets
- from sklearn.linear_model import LogisticRegression
- from numpy import shape
- from sklearn import metrics
- import numpy as np
- def read_data(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = np.array(X)
- y = np.array(y)
- return X, y
- X_train, y_train = read_data("cancer_train_data")
- X_test, y_test = read_data("cancer_test_data")
- def train_model(reg):
- print reg
- model = LogisticRegression(penalty=reg)
- model.fit(X_train, y_train)
- print "w", model.coef_
- # print (model.intercept_)
- y_pred_train = model.predict(X_train)
- y_pred_test = model.predict(X_test)
- e_train = metrics.mean_squared_error(y_train, y_pred_train)
- e_test = metrics.mean_squared_error(y_test, y_pred_test)
- kl_train = metrics.log_loss(y_train, y_pred_train)
- kl_test = metrics.log_loss(y_test, y_pred_test)
- print "训练集MSE:{}, KL:{}".format(e_train, kl_train)
- print "测试集MSE:{}, KL:{}".format(e_test, kl_test)
- print "训练测试差异{}".format(e_test-e_train)
- print
- # train_model(reg="None")
- train_model(reg="l1")
- train_model(reg="l2")
|