赞
踩
以下show函数决定生成图片还是pdf
import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder from sklearn import tree import pydotplus import matplotlib as mpl # 加载数据 def loaddata(): features = ["age", "work", "house", "credit"] x_train = pd.DataFrame([ ["青年", "否", "否", "一般"], ["青年", "否", "否", "好"], ["青年", "是", "否", "好"], ["青年", "是", "是", "一般"], ["青年", "否", "否", "一般"], ["中年", "否", "否", "一般"], ["中年", "否", "否", "好"], ["中年", "是", "是", "好"], ["中年", "否", "是", "非常好"], ["中年", "否", "是", "非常好"], ["老年", "否", "是", "非常好"], ["老年", "否", "是", "好"], ["老年", "是", "否", "好"], ["老年", "是", "否", "非常好"], ["老年", "否", "否", "一般"] ]) y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"]) y_type = [str(k) for k in np.unique(y_train)] # one-hot编码 le_x = LabelEncoder() le_x.fit(np.unique(x_train)) x_train = x_train.apply(le_x.transform) le_y = LabelEncoder() le_y.fit(y_train) y_train = le_y.transform(y_train) return x_train, y_train,features,le_x,le_y # 决策树可视化 def show(clf,feature,y_type): dot_data = tree.export_graphviz(clf,out_file=None, feature_names=feature, class_names=y_type,filled=True, rounded=True,special_characters=True) # 生成图片 graph = pydotplus.graph_from_dot_data(dot_data) graph.write_png('DT_show.png') if __name__ == '__main__': mpl.rcParams["font.sans-serif"] = [u'simHei'] mpl.rcParams['axes.unicode_minus'] = False # 加载数据 x_train, y_train,features,le_x,le_y = loaddata() # 分类 clf = DecisionTreeClassifier() clf.fit(x_train, y_train) y_type = [str(k) for k in np.unique(y_train)] # 可视化 show(clf, features,y_type) # 预测 X_show = pd.DataFrame([["青年", "否", "否", "一般"]]) X_test = X_show.apply(le_x.transform) y_predict = clf.predict(X_test) # 结果输出 X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))] print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict))) [{'age': '青年'}, {'work': '否'}, {'house': '否'}, {'credit': '一般'}]被分类为['否']
import pydotplus import graphviz import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder from sklearn import tree import matplotlib as mpl # 加载数据 def loaddata(): features = ["age", "work", "house", "credit"] x_train = pd.DataFrame([ ["青年", "否", "否", "一般"], ["青年", "否", "否", "好"], ["青年", "是", "否", "好"], ["青年", "是", "是", "一般"], ["青年", "否", "否", "一般"], ["中年", "否", "否", "一般"], ["中年", "否", "否", "好"], ["中年", "是", "是", "好"], ["中年", "否", "是", "非常好"], ["中年", "否", "是", "非常好"], ["老年", "否", "是", "非常好"], ["老年", "否", "是", "好"], ["老年", "是", "否", "好"], ["老年", "是", "否", "非常好"], ["老年", "否", "否", "一般"] ]) y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"]) y_type = [str(k) for k in np.unique(y_train)] # one-hot编码 le_x = LabelEncoder() le_x.fit(np.unique(x_train)) x_train = x_train.apply(le_x.transform) le_y = LabelEncoder() le_y.fit(y_train) y_train = le_y.transform(y_train) return x_train, y_train,features,le_x,le_y # 决策树可视化 def show(clf,feature,y_type): dot_data = tree.export_graphviz(clf,out_file=None, feature_names=feature, class_names=y_type,filled=True, rounded=True,special_characters=True) # 生成pdf graph = pydotplus.graph_from_dot_data(dot_data) # 写入pdf graph.write_pdf('iris.pdf') if __name__ == '__main__': mpl.rcParams["font.sans-serif"] = [u'simHei'] mpl.rcParams['axes.unicode_minus'] = False # 加载数据 x_train, y_train,features,le_x,le_y = loaddata() # 分类 clf = DecisionTreeClassifier() clf.fit(x_train, y_train) y_type = [str(k) for k in np.unique(y_train)] # 可视化 show(clf, features,y_type) # 预测 X_show = pd.DataFrame([["青年", "否", "否", "一般"]]) X_test = X_show.apply(le_x.transform) y_predict = clf.predict(X_test) # 结果输出 X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))] print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict)))
import pydotplus import graphviz import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.preprocessing import LabelEncoder from sklearn import tree import matplotlib as mpl # 加载数据 def loaddata(): features = ["age", "work", "house", "credit"] x_train = pd.DataFrame([ ["青年", "否", "否", "一般"], ["青年", "否", "否", "好"], ["青年", "是", "否", "好"], ["青年", "是", "是", "一般"], ["青年", "否", "否", "一般"], ["中年", "否", "否", "一般"], ["中年", "否", "否", "好"], ["中年", "是", "是", "好"], ["中年", "否", "是", "非常好"], ["中年", "否", "是", "非常好"], ["老年", "否", "是", "非常好"], ["老年", "否", "是", "好"], ["老年", "是", "否", "好"], ["老年", "是", "否", "非常好"], ["老年", "否", "否", "一般"] ]) y_train = pd.DataFrame(["否", "否", "是", "是", "否", "否", "否", "是", "是", "是", "是", "是", "是", "是", "否"]) y_type = [str(k) for k in np.unique(y_train)] # one-hot编码 le_x = LabelEncoder() le_x.fit(np.unique(x_train)) x_train = x_train.apply(le_x.transform) le_y = LabelEncoder() le_y.fit(y_train) y_train = le_y.transform(y_train) return x_train, y_train,features,le_x,le_y # 决策树可视化 def show(clf,feature,y_type): dot_data = tree.export_graphviz(clf,out_file='tree.dot', feature_names=feature, class_names=y_type,filled=True, rounded=True,special_characters=True) # 生成pdf with open('tree.dot') as f: dot_grapth = f.read() dot = graphviz.Source(dot_grapth) dot.view() if __name__ == '__main__': mpl.rcParams["font.sans-serif"] = [u'simHei'] mpl.rcParams['axes.unicode_minus'] = False # 加载数据 x_train, y_train,features,le_x,le_y = loaddata() # 分类 clf = DecisionTreeClassifier() clf.fit(x_train, y_train) y_type = [str(k) for k in np.unique(y_train)] # 可视化 show(clf, features,y_type) # 预测 X_show = pd.DataFrame([["青年", "否", "否", "一般"]]) X_test = X_show.apply(le_x.transform) y_predict = clf.predict(X_test) # 结果输出 X_show = [{features[i] :X_show.values[0][i]} for i in range(len(features))] print("{0}被分类为{1}".format(X_show,le_y.inverse_transform(y_predict)))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。