Browse Source

gbdt分类算法

yufeng0528 4 years ago
parent
commit
b61af5fab1
2 changed files with 72 additions and 2 deletions
  1. 2 2
      integr/my_gbdt.py
  2. 70 0
      integr/my_gbdt_classic.py

+ 2 - 2
integr/my_gbdt.py

@@ -26,14 +26,14 @@ def fit(Xtrain, Ytrain):
26 26
 
27 27
     gx = Ytrain
28 28
 
29
-    for i in range(20):
29
+    for i in range(50):
30 30
         # 求残差
31 31
         gx = gx - fx0
32 32
         print("第", i, '轮 残差', gx[:10])
33 33
         clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=4)
34 34
         clf.fit(Xtrain, gx)
35 35
 
36
-        fx0 = clf.predict(Xtrain)
36
+        fx0 = clf.predict(Xtrain)*0.7
37 37
         print("第", i, '轮 结果', fx0[:10])
38 38
         fx.append(fx0)
39 39
 

+ 70 - 0
integr/my_gbdt_classic.py

@@ -0,0 +1,70 @@
1
+#-*- coding:utf-8 -*-
2
+import numpy as np
3
+from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor
4
+from sklearn.model_selection import train_test_split
5
+from sklearn.datasets import load_wine,load_boston,load_breast_cancer
6
+from sklearn import tree
7
+
8
+
9
+def read_data():
10
+    boston = load_breast_cancer()
11
+    Xtrain, Xtest, Ytrain, Ytest = train_test_split(boston.data, boston.target, test_size=0.3)
12
+    for i in range(len(Ytrain)):
13
+        if Ytrain[i] == 0:
14
+            Ytrain[i] = -1
15
+    for i in range(len(Ytest)):
16
+        if Ytest[i] == 0:
17
+            Ytest[i] = -1
18
+    return Xtrain, Xtest, Ytrain, Ytest
19
+
20
+def init(Ytrain):
21
+    positive = sum(Ytrain == 1)
22
+    negative = Ytrain.shape[0] - positive
23
+    p = np.log2(positive/negative)
24
+    return np.ones(Ytrain.shape[0])*p
25
+
26
+def fit(Xtrain, Ytrain):
27
+    print("init", Ytrain[:10])
28
+    fx = []
29
+
30
+
31
+    fx0 = init(Ytrain)
32
+    fx.append(fx0)
33
+
34
+    print("0", fx0[:10])
35
+
36
+    gx = fx0
37
+
38
+    for i in range(50):
39
+        # 求伪残差
40
+        hx_0 = []
41
+        for j in range(Ytrain.shape[0]):
42
+            p = Ytrain[j] / (np.exp2(Ytrain[j]*gx[j]) + 1)
43
+            hx_0.append(p)
44
+
45
+        print("第", i, '轮 残差', gx[:10])
46
+        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=4)
47
+        clf.fit(Xtrain, np.array(hx_0))
48
+
49
+        fx_i = clf.predict(Xtrain)*0.7
50
+        print("第", i, '轮 结果', fx_i[:10])
51
+        fx.append(fx_i)
52
+        gx = gx + fx_i
53
+
54
+
55
+    gx = np.zeros(Ytrain.shape[0])
56
+    for i in range(len(fx)):
57
+        gx = gx + fx[i]
58
+
59
+    print(gx[:10])
60
+
61
+    gx = np.sign(gx)
62
+    p = sum(gx==Ytrain)/Ytrain.shape[0]
63
+
64
+    print(p)
65
+    return fx
66
+
67
+
68
+if __name__ == '__main__':
69
+    Xtrain, Xtest, Ytrain, Ytest = read_data()
70
+    fx = fit(Xtrain, Ytrain)