yufeng0528 4 years ago
parent
commit
7b58d78795
2 changed files with 47 additions and 9 deletions
  1. 24 4
      integr/my_gbdt.py
  2. 23 5
      integr/my_gbdt_classic.py

+ 24 - 4
integr/my_gbdt.py

@@ -18,6 +18,7 @@ def init(Ytrain):
18 18
 def fit(Xtrain, Ytrain):
19 19
     print("init", Ytrain[:10])
20 20
     fx = []
21
+    trees = []
21 22
 
22 23
     fx0 = np.ones(Ytrain.shape[0])*init(Ytrain)
23 24
     fx.append(fx0)
@@ -26,12 +27,13 @@ def fit(Xtrain, Ytrain):
26 27
 
27 28
     gx = Ytrain
28 29
 
29
-    for i in range(50):
30
+    for i in range(200):
30 31
         # 求残差
31 32
         gx = gx - fx0
32 33
         print("第", i, '轮 残差', gx[:10])
33
-        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=4)
34
+        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=2)
34 35
         clf.fit(Xtrain, gx)
36
+        trees.append(clf)
35 37
 
36 38
         fx0 = clf.predict(Xtrain)*0.7
37 39
         print("第", i, '轮 结果', fx0[:10])
@@ -48,8 +50,26 @@ def fit(Xtrain, Ytrain):
48 50
     for i in range(Ytrain.shape[0]):
49 51
         sum = sum + (gx[i] - Ytrain[i])*(gx[i] - Ytrain[i])
50 52
 
51
-    print(sum)
53
+    print("train mse", sum)
54
+    return trees, fx[0][0]
55
+
56
+
57
+def score(Xtest, Ytest, trees, fx0):
58
+    gx = np.ones(Ytest.shape[0]) * fx0
59
+
60
+    for i in range(len(trees)):
61
+        gx = gx + trees[i].predict(Xtest)
62
+    print(gx[:10])
63
+    print(Ytest[:10])
64
+
65
+    sum = 0
66
+    for i in range(Ytest.shape[0]):
67
+        sum = sum + (gx[i] - Ytest[i]) * (gx[i] - Ytest[i])
68
+
69
+    print("test mse", sum)
70
+
52 71
 
53 72
 if __name__ == '__main__':
54 73
     Xtrain, Xtest, Ytrain, Ytest = read_data()
55
-    fit(Xtrain, Ytrain)
74
+    trees, fx0 = fit(Xtrain, Ytrain)
75
+    score(Xtest, Ytest, trees, fx0)

+ 23 - 5
integr/my_gbdt_classic.py

@@ -23,9 +23,11 @@ def init(Ytrain):
23 23
     p = np.log2(positive/negative)
24 24
     return np.ones(Ytrain.shape[0])*p
25 25
 
26
+
26 27
 def fit(Xtrain, Ytrain):
27 28
     print("init", Ytrain[:10])
28 29
     fx = []
30
+    clf_tress = []
29 31
 
30 32
 
31 33
     fx0 = init(Ytrain)
@@ -35,7 +37,7 @@ def fit(Xtrain, Ytrain):
35 37
 
36 38
     gx = fx0
37 39
 
38
-    for i in range(50):
40
+    for i in range(10):
39 41
         # 求伪残差
40 42
         hx_0 = []
41 43
         for j in range(Ytrain.shape[0]):
@@ -43,8 +45,9 @@ def fit(Xtrain, Ytrain):
43 45
             hx_0.append(p)
44 46
 
45 47
         print("第", i, '轮 残差', gx[:10])
46
-        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=4)
48
+        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=1)
47 49
         clf.fit(Xtrain, np.array(hx_0))
50
+        clf_tress.append(clf)
48 51
 
49 52
         fx_i = clf.predict(Xtrain)*0.7
50 53
         print("第", i, '轮 结果', fx_i[:10])
@@ -61,10 +64,25 @@ def fit(Xtrain, Ytrain):
61 64
     gx = np.sign(gx)
62 65
     p = sum(gx==Ytrain)/Ytrain.shape[0]
63 66
 
64
-    print(p)
65
-    return fx
67
+    print("准确率", p)
68
+    return clf_tress, fx0[0]
69
+
70
+def score(Xtest, Ytest, trees, fx0):
71
+    gx = np.zeros(Ytest.shape[0])
72
+
73
+    for i in range(len(trees)):
74
+        gx = gx + trees[i].predict(Xtest)
75
+
76
+    gx = np.sign(gx)
77
+    p = sum(gx == Ytest) / Ytest.shape[0]
78
+    print("准确率", p)
79
+
80
+    gx = np.sign(trees[0].predict(Xtest))
81
+    p = sum(gx == Ytest) / Ytest.shape[0]
82
+    print("准确率0", p)
66 83
 
67 84
 
68 85
 if __name__ == '__main__':
69 86
     Xtrain, Xtest, Ytrain, Ytest = read_data()
70
-    fx = fit(Xtrain, Ytrain)
87
+    trees,fx0 = fit(Xtrain, Ytrain)
88
+    score(Xtest, Ytest, trees, fx0)