123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- # -*- encoding:utf-8 -*-
- from sklearn.linear_model import LogisticRegression
- from math import exp
- from math import log2
- import numpy as np
- import matplotlib.pyplot as plt
- def read_data(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = np.array(X)
- y = np.array(y)
- return X, y
- def curve(x_train, w, w0):
- results = x_train.tolist()
- for i in range(0, 100):
- x1 = 1.0 * i / 10
- x2 = -1 * (w[0] * x1 + w0) / w[1]
- results.append([x1, x2])
- results = ["{},{}".format(x1, x2) for [x1, x2] in results]
- return results
- def drawScatterAndLine(p, q):
- x1 = []
- x2 = []
- y1 = []
- y2 = []
- for idx,i in enumerate(q):
- if i == 0:
- x1.append(p[idx][0])
- y1.append(p[idx][1])
- else:
- x2.append(p[idx][0])
- y2.append(p[idx][1])
- plt.scatter(x1, y1)
- plt.scatter(x2, y2)
- plt.xlabel('p')
- plt.ylabel('q')
- plt.title('line regesion')
- plt.show()
- def sigmoid(x):
- return 1 / (1 + exp(-x))
- def data_matrix(X):
- data_mat = []
- for d in X:
- data_mat.append([1.0, *d])
- return data_mat
- max_iter = 100
- last_weights = []
- def fit_1(X_train, y_train):
- X_train = data_matrix(X_train)
- weights = np.array([1,1,1])
- x = np.array(X_train)
- for iter_ in range(max_iter):
- y = np.dot(x, np.transpose(weights))
- sig_y = []
- for i in range(len(y)):
- sig_y.append(sigmoid(y[i]))
- result = [0,0,0]
- loss = 0
- for i in range(len(X_train)):
- delta_i = (y_train[i][0]*(1-sig_y[i]) - (1-y_train[i][0])*sig_y[i])
- result = [result[0] + delta_i*X_train[i][0], result[1] + delta_i*X_train[i][1], result[2] + delta_i*X_train[i][2]]
- loss = loss - y_train[i][0]*log2(sig_y[i]) - (1-y_train[i][0])*log2(1- sig_y[i])
- result = -1 * np.array(result)/len(X_train)
- print("loss: ", loss)
- weights = weights - 0.8*result
- print("weight:", weights)
- return weights
- def score(X_test, y_test, last_weights):
- X_test = data_matrix(X_test)
- loss = 0
- y = np.dot(X_test, np.transpose(last_weights))
- sig_y = []
- for i in range(len(y)):
- sig_y.append(sigmoid(y[i]))
- for i in range(len(X_test)):
- loss = loss - y_test[i][0] * log2(sig_y[i]) - (1 - y_test[i][0]) * log2(1 - sig_y[i])
- print("y_test loss ", loss)
- def main():
- X_train, y_train = read_data("train_data")
- drawScatterAndLine(X_train, y_train)
- X_test, y_test = read_data("test_data")
- weight = fit_1(X_train, y_train)
- score(X_test, y_test, weight)
- # y_pred = model.predict_proba(X_test)
- # print y_pred
- # loss=log_loss(y_test,y_pred)
- # print "KL_loss:",loss
- # loss=log_loss(y_pred,y_test)
- # print "KL_loss:",loss
- '''
- curve_results=curve(X_train,model.coef_.tolist()[0],model.intercept_.tolist()[0])
- with open("train_with_splitline","w") as f :
- f.writelines("\n".join(curve_results))
- '''
- if __name__ == '__main__':
- main()
|