my_ada_boost.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # -*- encoding:utf-8 -*-
  2. from sklearn.datasets import load_wine, load_iris, load_breast_cancer
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.tree import DecisionTreeClassifier
  5. import numpy as np
  6. from tree import my_tree
  7. wine = load_breast_cancer()
  8. Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
  9. # 设置数据初始权重
  10. w = np.array([1/len(Ytrain) for i in range(len(Ytrain))])
  11. # 决策树权重
  12. alpha = []
  13. # 决策树数组
  14. trees = []
  15. pn = 1/len(Ytrain)
  16. for i in range(len(Ytrain)):
  17. if Ytrain[i] == 0:
  18. Ytrain[i] = -1
  19. for i in range(len(Ytest)):
  20. if Ytest[i] == 0:
  21. Ytest[i] = -1
  22. print(Ytest)
  23. for i in range(30):
  24. # 训练决策树
  25. # clf = DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1,
  26. # class_weight={0:w0, 1:1-w0}) # 实例化,criterion不写的话默认是基尼系数
  27. nodes = my_tree.fit(Xtrain, Ytrain, None, 0, w)
  28. # my_tree.print_width([nodes], 1)
  29. # print("熵值", my_tree.calc_ent(Ytrain, w))
  30. Xpredit = my_tree.predict(Xtrain, Ytrain, nodes)
  31. error = 0
  32. p_error = 0
  33. for j in range(len(Ytrain)):
  34. if Xpredit[j] != Ytrain[j]:
  35. error += w[j]
  36. p_error += pn
  37. if error > 0.5:
  38. continue
  39. if error == 0:
  40. error = 0.001
  41. print("第", i, "轮错误率", p_error, error)
  42. ab = 0.5*np.log2(1/error - 1)
  43. # 更新权重
  44. for j in range(len(Ytrain)):
  45. w[j] = w[j]*np.exp(-ab*Ytrain[j]*Xpredit[j])
  46. sum_w = sum(w)
  47. w = w/sum_w
  48. alpha.append(ab)
  49. trees.append(nodes)
  50. predicts = []
  51. for tree in trees:
  52. predicts.append(my_tree.predict(Xtest, None, tree))
  53. print(alpha)
  54. # 结果加权
  55. result = np.zeros(len(Xtest), float)
  56. for p in predicts:
  57. r = 0
  58. for w_alpha in alpha:
  59. r += w_alpha * p
  60. result = result + r
  61. print("sign前:" , result)
  62. result = np.sign(result)
  63. print("sign后:", result)
  64. # print(1- sum(np.bitwise_xor(Ytest, result))/len(result))
  65. # print(result == Ytest)
  66. print(len([i for i in result == Ytest if i])/len(result))
  67. # cmp = np.concatenate(([result], [Ytest]), axis=0)
  68. # print(cmp)
  69. clf = DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1)
  70. clf = clf.fit(Xtrain, Ytrain)
  71. print(clf.score(Xtest, Ytest))