# -*- encoding:utf-8 -*- from sklearn.linear_model import LogisticRegression from math import exp from math import log2 import numpy as np import matplotlib.pyplot as plt def read_data(path): with open(path) as f: lines = f.readlines() lines = [eval(line.strip()) for line in lines] X, y = zip(*lines) X = np.array(X) y = np.array(y) return X, y def curve(x_train, w, w0): results = x_train.tolist() for i in range(0, 100): x1 = 1.0 * i / 10 x2 = -1 * (w[0] * x1 + w0) / w[1] results.append([x1, x2]) results = ["{},{}".format(x1, x2) for [x1, x2] in results] return results def drawScatterAndLine(p, q): x1 = [] x2 = [] y1 = [] y2 = [] for idx,i in enumerate(q): if i == 0: x1.append(p[idx][0]) y1.append(p[idx][1]) else: x2.append(p[idx][0]) y2.append(p[idx][1]) plt.scatter(x1, y1) plt.scatter(x2, y2) plt.xlabel('p') plt.ylabel('q') plt.title('line regesion') plt.show() def sigmoid(x): return 1 / (1 + exp(-x)) def data_matrix(X): data_mat = [] for d in X: data_mat.append([1.0, *d]) return data_mat max_iter = 100 last_weights = [] def fit_1(X_train, y_train): X_train = data_matrix(X_train) weights = np.array([1,1,1]) x = np.array(X_train) for iter_ in range(max_iter): y = np.dot(x, np.transpose(weights)) sig_y = [] for i in range(len(y)): sig_y.append(sigmoid(y[i])) result = [0,0,0] loss = 0 for i in range(len(X_train)): delta_i = (y_train[i][0]*(1-sig_y[i]) - (1-y_train[i][0])*sig_y[i]) result = [result[0] + delta_i*X_train[i][0], result[1] + delta_i*X_train[i][1], result[2] + delta_i*X_train[i][2]] loss = loss - y_train[i][0]*log2(sig_y[i]) - (1-y_train[i][0])*log2(1- sig_y[i]) result = -1 * np.array(result)/len(X_train) print("loss: ", loss) weights = weights - 0.8*result print("weight:", weights) return weights def score(X_test, y_test, last_weights): X_test = data_matrix(X_test) loss = 0 y = np.dot(X_test, np.transpose(last_weights)) sig_y = [] for i in range(len(y)): sig_y.append(sigmoid(y[i])) for i in range(len(X_test)): loss = loss - y_test[i][0] * log2(sig_y[i]) - (1 - y_test[i][0]) * log2(1 - sig_y[i]) print("y_test loss ", loss) def main(): X_train, y_train = read_data("train_data") drawScatterAndLine(X_train, y_train) X_test, y_test = read_data("test_data") weight = fit_1(X_train, y_train) score(X_test, y_test, weight) # y_pred = model.predict_proba(X_test) # print y_pred # loss=log_loss(y_test,y_pred) # print "KL_loss:",loss # loss=log_loss(y_pred,y_test) # print "KL_loss:",loss ''' curve_results=curve(X_train,model.coef_.tolist()[0],model.intercept_.tolist()[0]) with open("train_with_splitline","w") as f : f.writelines("\n".join(curve_results)) ''' if __name__ == '__main__': main()