决策树(Decision Tree) (3)

1 from sklearn.datasets import load_iris 2 from sklearn.tree import DecisionTreeClassifier, export_graphviz 3 from sklearn.model_selection import train_test_split 4 import graphviz 5 import os 6 7 # 加载Iris数据集 8 iris = load_iris() 9 # 获取数据与类别标签 10 iris_data = iris.data 11 iris_target = iris.target 12 # 类别标签名称 13 iris_target_names = iris.target_names 14 # 用来训练的特征名称 15 iris_feat_names = iris.feature_names 16 # 打印类别和特征名称 17 print(iris_feat_names) 18 print(iris_target_names) 19 20 # 对数据集进行划分,将30%的数据用来做测试集,其余作为训练集 21 X_train, X_test, y_train, y_test = train_test_split(iris_data, iris_target, test_size=0.3, random_state=2019) 22 print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) 23 24 # # 建立决策树模型,采用信息增益准则,最优值处划分,最大深度不设定,即不限制深度 25 # clf = DecisionTreeClassifier(criterion=\'entropy\', splitter=\'best\', max_depth=None, random_state=2019) 26 # clf.fit(X_train, y_train) 27 # # 计算训练集和测试集精度 28 # train_acc = sum(clf.predict(X_train) == y_train)/len(y_train) 29 # test_acc = sum(clf.predict(X_test) == y_test)/len(y_test) 30 # print(\'Train Accuracy is : {:.2f}\'.format(train_acc)) 31 # print(\'Test Accuracy is : {:.2f}\'.format(test_acc)) 32 #为避免过拟合,最大深度设为3 33 clf=DecisionTreeClassifier(criterion=\'entropy\',splitter=\'best\',max_depth=3, random_state=2019) 34 clf.fit(X_train, y_train) 35 train_acc = sum(clf.predict(X_train) == y_train)/len(y_train) 36 test_acc = sum(clf.predict(X_test) == y_test)/len(y_test) 37 print(\'Train Accuracy is : {:.2f}\'.format(train_acc)) 38 print(\'Test Accuracy is : {:.2f}\'.format(test_acc)) 39 #Graphviz对决策树进行可视化 40 dot_data = export_graphviz(clf, feature_names=iris_feat_names, out_file=None) 41 dot = graphviz.Source(dot_data) 42 dot.view()

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zgwzfj.html