# -*- encoding:utf-8 -*- from sklearn.datasets import load_wine, load_iris, load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier import numpy as np from tree import my_tree wine = load_breast_cancer() Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3) # 设置数据初始权重 w = np.array([1/len(Ytrain) for i in range(len(Ytrain))]) # 决策树权重 alpha = [] # 决策树数组 trees = [] pn = 1/len(Ytrain) for i in range(len(Ytrain)): if Ytrain[i] == 0: Ytrain[i] = -1 for i in range(len(Ytest)): if Ytest[i] == 0: Ytest[i] = -1 print(Ytest) for i in range(30): # 训练决策树 # clf = DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1, # class_weight={0:w0, 1:1-w0}) # 实例化,criterion不写的话默认是基尼系数 nodes = my_tree.fit(Xtrain, Ytrain, None, 0, w) # my_tree.print_width([nodes], 1) # print("熵值", my_tree.calc_ent(Ytrain, w)) Xpredit = my_tree.predict(Xtrain, Ytrain, nodes) error = 0 p_error = 0 for j in range(len(Ytrain)): if Xpredit[j] != Ytrain[j]: error += w[j] p_error += pn if error > 0.5: continue if error == 0: error = 0.001 print("第", i, "轮错误率", p_error, error) ab = 0.5*np.log2(1/error - 1) # 更新权重 for j in range(len(Ytrain)): w[j] = w[j]*np.exp(-ab*Ytrain[j]*Xpredit[j]) sum_w = sum(w) w = w/sum_w alpha.append(ab) trees.append(nodes) predicts = [] for tree in trees: predicts.append(my_tree.predict(Xtest, None, tree)) print(alpha) # 结果加权 result = np.zeros(len(Xtest), float) for p in predicts: r = 0 for w_alpha in alpha: r += w_alpha * p result = result + r print("sign前:" , result) result = np.sign(result) print("sign后:", result) # print(1- sum(np.bitwise_xor(Ytest, result))/len(result)) # print(result == Ytest) print(len([i for i in result == Ytest if i])/len(result)) # cmp = np.concatenate(([result], [Ytest]), axis=0) # print(cmp) clf = DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1) clf = clf.fit(Xtrain, Ytrain) print(clf.score(Xtest, Ytest))