Browse Source

用自己写的弱分类器训练就是有问题

yufeng0528 4 years ago
parent
commit
98df5b5fbf
2 changed files with 22 additions and 10 deletions
  1. 6 6
      integr/my_ada_boost.py
  2. 16 4
      tree/my_tree.py

+ 6 - 6
integr/my_ada_boost.py

@@ -20,13 +20,13 @@ def read_data():
20 20
 
21 21
 def fit(Xtrain, Ytrain):
22 22
     # 设置数据初始权重
23
-    w = np.array([1/len(Ytrain) for i in range(len(Ytrain))])
23
+    w = np.ones(len(Ytrain))/Ytrain.shape[0]
24 24
     # 决策树权重
25 25
     alphas = []
26 26
     # 决策树数组
27 27
     trees = []
28 28
     pn = 1/len(Ytrain)
29
-    print(Ytest)
29
+    # print(Ytest)
30 30
 
31 31
     for i in range(20):
32 32
         # 训练决策树
@@ -34,10 +34,10 @@ def fit(Xtrain, Ytrain):
34 34
                                      )  # 实例化,criterion不写的话默认是基尼系数
35 35
 
36 36
         clf.fit(Xtrain, Ytrain, w)
37
-        Xpredit = clf.predict(Xtrain)
37
+        Ypredit = clf.predict(Xtrain)
38 38
 
39
-        error = np.dot([Xpredit != Ytrain], w)
40
-        p_error = sum(Xpredit != Ytrain)/Ytrain.shape[0]
39
+        error = np.dot(Ypredit != Ytrain, w)
40
+        p_error = sum(Ypredit != Ytrain)/Ytrain.shape[0]
41 41
 
42 42
         if error > 0.5:
43 43
             continue
@@ -49,7 +49,7 @@ def fit(Xtrain, Ytrain):
49 49
 
50 50
         # 更新权重
51 51
         for j in range(Ytrain.shape[0]):
52
-            w[j] = w[j]*np.exp(-alpha*Ytrain[j]*Xpredit[j])
52
+            w[j] = w[j]*np.exp(-alpha*Ytrain[j]*Ypredit[j])
53 53
         sum_w = sum(w)
54 54
         w = w/sum_w
55 55
 

+ 16 - 4
tree/my_tree.py

@@ -1,6 +1,6 @@
1 1
 #!/usr/bin/python
2 2
 # -*- coding: UTF-8 -*-
3
-from sklearn.datasets import load_wine
3
+from sklearn.datasets import load_wine,load_breast_cancer
4 4
 from sklearn.model_selection import train_test_split
5 5
 import numpy as np
6 6
 
@@ -367,8 +367,20 @@ class MyDT(object):
367 367
         print_width([nodes], 1, feature_names, class_names)
368 368
 
369 369
 
370
+def read_data_1():
371
+    wine = load_breast_cancer()
372
+    Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
373
+    for i in range(len(Ytrain)):
374
+        if Ytrain[i] == 0:
375
+            Ytrain[i] = -1
376
+    for i in range(len(Ytest)):
377
+        if Ytest[i] == 0:
378
+            Ytest[i] = -1
379
+    return Xtrain, Xtest, Ytrain, Ytest
380
+
381
+
370 382
 if __name__ == '__main__':
371
-    Xtrain, Xtest, Ytrain, Ytest = read_data()
383
+    Xtrain, Xtest, Ytrain, Ytest = read_data_1()
372 384
     print(calc_ent1(Ytrain))
373 385
 
374 386
     weights = np.ones(len(Ytrain))/Ytrain.shape[0]
@@ -386,6 +398,6 @@ if __name__ == '__main__':
386 398
 
387 399
     print(clf.predict(Xtest))
388 400
 
389
-    print(clf.score(Xtest, Ytest))
390
-    print(clf.score(Xtrain, Ytrain))
401
+    print("测试集", clf.score(Xtest, Ytest))
402
+    print("训练集", clf.score(Xtrain, Ytrain))
391 403
     MyDT.export(clf, feature_name, class_names)