|
@@ -1,6 +1,6 @@
|
1
|
1
|
#!/usr/bin/python
|
2
|
2
|
# -*- coding: UTF-8 -*-
|
3
|
|
-from sklearn.datasets import load_wine
|
|
3
|
+from sklearn.datasets import load_wine,load_breast_cancer
|
4
|
4
|
from sklearn.model_selection import train_test_split
|
5
|
5
|
import numpy as np
|
6
|
6
|
|
|
@@ -367,8 +367,20 @@ class MyDT(object):
|
367
|
367
|
print_width([nodes], 1, feature_names, class_names)
|
368
|
368
|
|
369
|
369
|
|
|
370
|
+def read_data_1():
|
|
371
|
+ wine = load_breast_cancer()
|
|
372
|
+ Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
|
|
373
|
+ for i in range(len(Ytrain)):
|
|
374
|
+ if Ytrain[i] == 0:
|
|
375
|
+ Ytrain[i] = -1
|
|
376
|
+ for i in range(len(Ytest)):
|
|
377
|
+ if Ytest[i] == 0:
|
|
378
|
+ Ytest[i] = -1
|
|
379
|
+ return Xtrain, Xtest, Ytrain, Ytest
|
|
380
|
+
|
|
381
|
+
|
370
|
382
|
if __name__ == '__main__':
|
371
|
|
- Xtrain, Xtest, Ytrain, Ytest = read_data()
|
|
383
|
+ Xtrain, Xtest, Ytrain, Ytest = read_data_1()
|
372
|
384
|
print(calc_ent1(Ytrain))
|
373
|
385
|
|
374
|
386
|
weights = np.ones(len(Ytrain))/Ytrain.shape[0]
|
|
@@ -386,6 +398,6 @@ if __name__ == '__main__':
|
386
|
398
|
|
387
|
399
|
print(clf.predict(Xtest))
|
388
|
400
|
|
389
|
|
- print(clf.score(Xtest, Ytest))
|
390
|
|
- print(clf.score(Xtrain, Ytrain))
|
|
401
|
+ print("测试集", clf.score(Xtest, Ytest))
|
|
402
|
+ print("训练集", clf.score(Xtrain, Ytrain))
|
391
|
403
|
MyDT.export(clf, feature_name, class_names)
|