123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- from sklearn import tree
- from sklearn.datasets import load_wine
- from sklearn.model_selection import train_test_split
- import numpy
- import graphviz
- wine = load_wine()
- print(wine.data.shape) #178*13
- print(wine.target)
- #如果wine是一张表,应该长这样:
- import pandas as pd
- pdata = pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)
- print(wine.feature_names)
- print(wine.target_names)
- Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3)
- numpy.savetxt("foo.csv", Xtrain, delimiter=",")
- clf = tree.DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1)#实例化,criterion不写的话默认是基尼系数
- # clf.n_features_ = 2
- clf = clf.fit(Xtrain, Ytrain)
- score = clf.score(Xtest, Ytest) #返回预测的准确度
- print("score:", score)
- feature_name = ['酒精', '苹果酸', '灰', '灰的碱性', '镁', '总酚', '类黄酮',
- '非黄烷类酚类', '花青素', '颜色强度', '色调', 'od280/od315稀释葡萄酒', '脯氨酸']
- dot_data = tree.export_graphviz(clf
- # ,out_file = None
- , feature_names=feature_name
- , class_names=["琴酒", "雪莉", "贝尔摩德"]
- , filled=True # 让树的每一块有颜色,颜色越浅,表示不纯度越高
- , rounded=True # 树的块的形状
- )
- dot_data = dot_data.replace('helvetica', '"Microsoft YaHei"')
- graph = graphviz.Source(dot_data)
- graph.render("Tree1")
- graph # graph.view()
|