#!/usr/bin/python # -*- coding: UTF-8 -*- from sklearn import tree from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split import numpy import graphviz wine = load_wine() print(wine.data.shape) #178*13 print(wine.target) #如果wine是一张表,应该长这样: import pandas as pd pdata = pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1) print(wine.feature_names) print(wine.target_names) Xtrain, Xtest, Ytrain, Ytest = train_test_split(wine.data,wine.target,test_size=0.3) numpy.savetxt("foo.csv", Xtrain, delimiter=",") clf = tree.DecisionTreeClassifier(criterion="entropy", max_features=1, max_depth=1)#实例化,criterion不写的话默认是基尼系数 # clf.n_features_ = 2 clf = clf.fit(Xtrain, Ytrain) score = clf.score(Xtest, Ytest) #返回预测的准确度 print("score:", score) feature_name = ['酒精', '苹果酸', '灰', '灰的碱性', '镁', '总酚', '类黄酮', '非黄烷类酚类', '花青素', '颜色强度', '色调', 'od280/od315稀释葡萄酒', '脯氨酸'] dot_data = tree.export_graphviz(clf # ,out_file = None , feature_names=feature_name , class_names=["琴酒", "雪莉", "贝尔摩德"] , filled=True # 让树的每一块有颜色,颜色越浅,表示不纯度越高 , rounded=True # 树的块的形状 ) dot_data = dot_data.replace('helvetica', '"Microsoft YaHei"') graph = graphviz.Source(dot_data) graph.render("Tree1") graph # graph.view()