Browse Source

手写逻辑回归梯度下降

yufeng0528 4 years ago
parent
commit
bdf64fba79
1 changed files with 127 additions and 0 deletions
  1. 127 0
      logistic/gradient_logistic.py

+ 127 - 0
logistic/gradient_logistic.py

@@ -0,0 +1,127 @@
1
+# -*- encoding:utf-8 -*-
2
+from sklearn.linear_model import LogisticRegression
3
+from math import exp
4
+from math import log2
5
+import numpy as np
6
+import matplotlib.pyplot as plt
7
+
8
+def read_data(path):
9
+    with open(path) as f:
10
+        lines = f.readlines()
11
+    lines = [eval(line.strip()) for line in lines]
12
+    X, y = zip(*lines)
13
+    X = np.array(X)
14
+    y = np.array(y)
15
+    return X, y
16
+
17
+
18
+def curve(x_train, w, w0):
19
+    results = x_train.tolist()
20
+    for i in range(0, 100):
21
+        x1 = 1.0 * i / 10
22
+        x2 = -1 * (w[0] * x1 + w0) / w[1]
23
+        results.append([x1, x2])
24
+    results = ["{},{}".format(x1, x2) for [x1, x2] in results]
25
+    return results
26
+
27
+
28
+def drawScatterAndLine(p, q):
29
+    x1 = []
30
+    x2 = []
31
+    y1 = []
32
+    y2 = []
33
+
34
+    for idx,i in enumerate(q):
35
+        if i == 0:
36
+            x1.append(p[idx][0])
37
+            y1.append(p[idx][1])
38
+        else:
39
+            x2.append(p[idx][0])
40
+            y2.append(p[idx][1])
41
+
42
+    plt.scatter(x1, y1)
43
+    plt.scatter(x2, y2)
44
+    plt.xlabel('p')
45
+    plt.ylabel('q')
46
+    plt.title('line regesion')
47
+    plt.show()
48
+
49
+
50
+def sigmoid(x):
51
+    return 1 / (1 + exp(-x))
52
+
53
+
54
+def data_matrix(X):
55
+    data_mat = []
56
+    for d in X:
57
+        data_mat.append([1.0, *d])
58
+    return data_mat
59
+
60
+
61
+max_iter = 100
62
+last_weights = []
63
+
64
+
65
+def fit_1(X_train, y_train):
66
+    X_train = data_matrix(X_train)
67
+    weights = np.array([1,1,1])
68
+
69
+    x = np.array(X_train)
70
+    for iter_ in range(max_iter):
71
+        y = np.dot(x, np.transpose(weights))
72
+        sig_y = []
73
+        for i in range(len(y)):
74
+            sig_y.append(sigmoid(y[i]))
75
+
76
+        result = [0,0,0]
77
+        loss = 0
78
+        for i in range(len(X_train)):
79
+            delta_i = (y_train[i][0]*(1-sig_y[i]) - (1-y_train[i][0])*sig_y[i])
80
+            result = [result[0] + delta_i*X_train[i][0], result[1] + delta_i*X_train[i][1], result[2] + delta_i*X_train[i][2]]
81
+            loss = loss - y_train[i][0]*log2(sig_y[i]) - (1-y_train[i][0])*log2(1- sig_y[i])
82
+        result = -1 * np.array(result)/len(X_train)
83
+        print("loss: ", loss)
84
+
85
+        weights = weights - 0.8*result
86
+        print("weight:", weights)
87
+
88
+    return weights
89
+
90
+
91
+def score(X_test, y_test, last_weights):
92
+    X_test = data_matrix(X_test)
93
+    loss = 0
94
+
95
+    y = np.dot(X_test, np.transpose(last_weights))
96
+    sig_y = []
97
+    for i in range(len(y)):
98
+        sig_y.append(sigmoid(y[i]))
99
+
100
+    for i in range(len(X_test)):
101
+        loss = loss - y_test[i][0] * log2(sig_y[i]) - (1 - y_test[i][0]) * log2(1 - sig_y[i])
102
+    print("y_test loss ", loss)
103
+
104
+
105
+def main():
106
+    X_train, y_train = read_data("train_data")
107
+    drawScatterAndLine(X_train, y_train)
108
+    X_test, y_test = read_data("test_data")
109
+
110
+    weight = fit_1(X_train, y_train)
111
+
112
+    score(X_test, y_test, weight)
113
+    # y_pred = model.predict_proba(X_test)
114
+    # print y_pred
115
+    # loss=log_loss(y_test,y_pred)
116
+    # print "KL_loss:",loss
117
+    # loss=log_loss(y_pred,y_test)
118
+    # print "KL_loss:",loss
119
+    '''
120
+    curve_results=curve(X_train,model.coef_.tolist()[0],model.intercept_.tolist()[0])
121
+    with open("train_with_splitline","w") as f :
122
+        f.writelines("\n".join(curve_results))
123
+    '''
124
+
125
+
126
+if __name__ == '__main__':
127
+    main()