yufeng0528 4 years ago
parent
commit
6d5e009afc
2 changed files with 15 additions and 8 deletions
  1. 11 6
      integr/my_gbdt.py
  2. 4 2
      integr/my_gbdt_classic.py

+ 11 - 6
integr/my_gbdt.py

@@ -27,11 +27,11 @@ def fit(Xtrain, Ytrain):
27 27
 
28 28
     gx = Ytrain
29 29
 
30
-    for i in range(200):
30
+    for i in range(55):
31 31
         # 求残差
32 32
         gx = gx - fx0
33 33
         print("第", i, '轮 残差', gx[:10])
34
-        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=2)
34
+        clf = tree.DecisionTreeRegressor(criterion="mse", max_features=5, max_depth=10)
35 35
         clf.fit(Xtrain, gx)
36 36
         trees.append(clf)
37 37
 
@@ -48,9 +48,9 @@ def fit(Xtrain, Ytrain):
48 48
 
49 49
     sum = 0
50 50
     for i in range(Ytrain.shape[0]):
51
-        sum = sum + (gx[i] - Ytrain[i])*(gx[i] - Ytrain[i])
51
+        sum = sum + (gx[i] - Ytrain[i])**2
52 52
 
53
-    print("train mse", sum)
53
+    print("train mse", sum/Ytrain.shape[0])
54 54
     return trees, fx[0][0]
55 55
 
56 56
 
@@ -64,10 +64,15 @@ def score(Xtest, Ytest, trees, fx0):
64 64
 
65 65
     sum = 0
66 66
     for i in range(Ytest.shape[0]):
67
-        sum = sum + (gx[i] - Ytest[i]) * (gx[i] - Ytest[i])
67
+        sum = sum + (gx[i] - Ytest[i])**2
68 68
 
69
-    print("test mse", sum)
69
+    print("test mse", sum/Ytest.shape[0])
70 70
 
71
+    gx = trees[0].predict(Xtest)
72
+    sum = 0
73
+    for i in range(Ytest.shape[0]):
74
+        sum = sum + (gx[i] - Ytest[i]) ** 2
75
+    print("test mse0", sum / Ytest.shape[0])
71 76
 
72 77
 if __name__ == '__main__':
73 78
     Xtrain, Xtest, Ytrain, Ytest = read_data()

+ 4 - 2
integr/my_gbdt_classic.py

@@ -17,10 +17,11 @@ def read_data():
17 17
             Ytest[i] = -1
18 18
     return Xtrain, Xtest, Ytrain, Ytest
19 19
 
20
+
20 21
 def init(Ytrain):
21 22
     positive = sum(Ytrain == 1)
22 23
     negative = Ytrain.shape[0] - positive
23
-    p = np.log2(positive/negative)
24
+    p = np.log2(positive/negative) # 可能是为了训练稍微快点
24 25
     return np.ones(Ytrain.shape[0])*p
25 26
 
26 27
 
@@ -67,8 +68,9 @@ def fit(Xtrain, Ytrain):
67 68
     print("准确率", p)
68 69
     return clf_tress, fx0[0]
69 70
 
71
+
70 72
 def score(Xtest, Ytest, trees, fx0):
71
-    gx = np.zeros(Ytest.shape[0])
73
+    gx = np.ones(Ytest.shape[0])*fx0
72 74
 
73 75
     for i in range(len(trees)):
74 76
         gx = gx + trees[i].predict(Xtest)