|
@@ -23,9 +23,11 @@ def init(Ytrain):
|
23
|
23
|
p = np.log2(positive/negative)
|
24
|
24
|
return np.ones(Ytrain.shape[0])*p
|
25
|
25
|
|
|
26
|
+
|
26
|
27
|
def fit(Xtrain, Ytrain):
|
27
|
28
|
print("init", Ytrain[:10])
|
28
|
29
|
fx = []
|
|
30
|
+ clf_tress = []
|
29
|
31
|
|
30
|
32
|
|
31
|
33
|
fx0 = init(Ytrain)
|
|
@@ -35,7 +37,7 @@ def fit(Xtrain, Ytrain):
|
35
|
37
|
|
36
|
38
|
gx = fx0
|
37
|
39
|
|
38
|
|
- for i in range(50):
|
|
40
|
+ for i in range(10):
|
39
|
41
|
# 求伪残差
|
40
|
42
|
hx_0 = []
|
41
|
43
|
for j in range(Ytrain.shape[0]):
|
|
@@ -43,8 +45,9 @@ def fit(Xtrain, Ytrain):
|
43
|
45
|
hx_0.append(p)
|
44
|
46
|
|
45
|
47
|
print("第", i, '轮 残差', gx[:10])
|
46
|
|
- clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=4)
|
|
48
|
+ clf = tree.DecisionTreeRegressor(criterion="mse", max_features=1, max_depth=1)
|
47
|
49
|
clf.fit(Xtrain, np.array(hx_0))
|
|
50
|
+ clf_tress.append(clf)
|
48
|
51
|
|
49
|
52
|
fx_i = clf.predict(Xtrain)*0.7
|
50
|
53
|
print("第", i, '轮 结果', fx_i[:10])
|
|
@@ -61,10 +64,25 @@ def fit(Xtrain, Ytrain):
|
61
|
64
|
gx = np.sign(gx)
|
62
|
65
|
p = sum(gx==Ytrain)/Ytrain.shape[0]
|
63
|
66
|
|
64
|
|
- print(p)
|
65
|
|
- return fx
|
|
67
|
+ print("准确率", p)
|
|
68
|
+ return clf_tress, fx0[0]
|
|
69
|
+
|
|
70
|
+def score(Xtest, Ytest, trees, fx0):
|
|
71
|
+ gx = np.zeros(Ytest.shape[0])
|
|
72
|
+
|
|
73
|
+ for i in range(len(trees)):
|
|
74
|
+ gx = gx + trees[i].predict(Xtest)
|
|
75
|
+
|
|
76
|
+ gx = np.sign(gx)
|
|
77
|
+ p = sum(gx == Ytest) / Ytest.shape[0]
|
|
78
|
+ print("准确率", p)
|
|
79
|
+
|
|
80
|
+ gx = np.sign(trees[0].predict(Xtest))
|
|
81
|
+ p = sum(gx == Ytest) / Ytest.shape[0]
|
|
82
|
+ print("准确率0", p)
|
66
|
83
|
|
67
|
84
|
|
68
|
85
|
if __name__ == '__main__':
|
69
|
86
|
Xtrain, Xtest, Ytrain, Ytest = read_data()
|
70
|
|
- fx = fit(Xtrain, Ytrain)
|
|
87
|
+ trees,fx0 = fit(Xtrain, Ytrain)
|
|
88
|
+ score(Xtest, Ytest, trees, fx0)
|