gradient_logistic.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # -*- encoding:utf-8 -*-
  2. from sklearn.linear_model import LogisticRegression
  3. from math import exp
  4. from math import log2
  5. import numpy as np
  6. import matplotlib.pyplot as plt
  7. def read_data(path):
  8. with open(path) as f:
  9. lines = f.readlines()
  10. lines = [eval(line.strip()) for line in lines]
  11. X, y = zip(*lines)
  12. X = np.array(X)
  13. y = np.array(y)
  14. return X, y
  15. def curve(x_train, w, w0):
  16. results = x_train.tolist()
  17. for i in range(0, 100):
  18. x1 = 1.0 * i / 10
  19. x2 = -1 * (w[0] * x1 + w0) / w[1]
  20. results.append([x1, x2])
  21. results = ["{},{}".format(x1, x2) for [x1, x2] in results]
  22. return results
  23. def drawScatterAndLine(p, q):
  24. x1 = []
  25. x2 = []
  26. y1 = []
  27. y2 = []
  28. for idx,i in enumerate(q):
  29. if i == 0:
  30. x1.append(p[idx][0])
  31. y1.append(p[idx][1])
  32. else:
  33. x2.append(p[idx][0])
  34. y2.append(p[idx][1])
  35. plt.scatter(x1, y1)
  36. plt.scatter(x2, y2)
  37. plt.xlabel('p')
  38. plt.ylabel('q')
  39. plt.title('line regesion')
  40. plt.show()
  41. def sigmoid(x):
  42. return 1 / (1 + exp(-x))
  43. def data_matrix(X):
  44. data_mat = []
  45. for d in X:
  46. data_mat.append([1.0, *d])
  47. return data_mat
  48. max_iter = 100
  49. last_weights = []
  50. def fit_1(X_train, y_train):
  51. X_train = data_matrix(X_train)
  52. weights = np.array([1,1,1])
  53. x = np.array(X_train)
  54. for iter_ in range(max_iter):
  55. y = np.dot(x, np.transpose(weights))
  56. sig_y = []
  57. for i in range(len(y)):
  58. sig_y.append(sigmoid(y[i]))
  59. result = [0,0,0]
  60. loss = 0
  61. for i in range(len(X_train)):
  62. delta_i = (y_train[i][0]*(1-sig_y[i]) - (1-y_train[i][0])*sig_y[i])
  63. result = [result[0] + delta_i*X_train[i][0], result[1] + delta_i*X_train[i][1], result[2] + delta_i*X_train[i][2]]
  64. loss = loss - y_train[i][0]*log2(sig_y[i]) - (1-y_train[i][0])*log2(1- sig_y[i])
  65. result = -1 * np.array(result)/len(X_train)
  66. print("loss: ", loss)
  67. weights = weights - 0.8*result
  68. print("weight:", weights)
  69. return weights
  70. def score(X_test, y_test, last_weights):
  71. X_test = data_matrix(X_test)
  72. loss = 0
  73. y = np.dot(X_test, np.transpose(last_weights))
  74. sig_y = []
  75. for i in range(len(y)):
  76. sig_y.append(sigmoid(y[i]))
  77. for i in range(len(X_test)):
  78. loss = loss - y_test[i][0] * log2(sig_y[i]) - (1 - y_test[i][0]) * log2(1 - sig_y[i])
  79. print("y_test loss ", loss)
  80. def main():
  81. X_train, y_train = read_data("train_data")
  82. drawScatterAndLine(X_train, y_train)
  83. X_test, y_test = read_data("test_data")
  84. weight = fit_1(X_train, y_train)
  85. score(X_test, y_test, weight)
  86. # y_pred = model.predict_proba(X_test)
  87. # print y_pred
  88. # loss=log_loss(y_test,y_pred)
  89. # print "KL_loss:",loss
  90. # loss=log_loss(y_pred,y_test)
  91. # print "KL_loss:",loss
  92. '''
  93. curve_results=curve(X_train,model.coef_.tolist()[0],model.intercept_.tolist()[0])
  94. with open("train_with_splitline","w") as f :
  95. f.writelines("\n".join(curve_results))
  96. '''
  97. if __name__ == '__main__':
  98. main()