当前位置:   article > 正文

【红酒数据集】决策树&随机森林对比_决策树和随机森林实现酒分类的对比

决策树和随机森林实现酒分类的对比
  1. %matplotlib inline
  2. from sklearn.tree import DecisionTreeClassifier
  3. from sklearn.ensemble import RandomForestClassifier
  4. from sklearn.datasets import load_wine
wine = load_wine()
wine
[{"metadata":{"trusted":false,"scrolled":false},"id":"e7e71df6","cell_type":"code","source":"wine","execution_count":88,"outputs":[{"data":{"text/plain":"{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n         1.065e+03],\n        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n         1.050e+03],\n        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n         1.185e+03],\n        ...,\n        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n         8.350e+02],\n        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n         8.400e+02],\n        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n         5.600e+02]]),\n 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,\n        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n        2, 2]),\n 'frame': None,\n 'target_names': array(['class_0', 'class_1', 'class_2'], dtype='
wine.data.shape
(178, 13)
wine.target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])
  1. #实例化
  2. #训练集带入实例化后的模型去进行训练,使用的接口是fit
  3. #使用其他接口将测试集导入我们训练好的模型,去获取我们是希望获取的结果()score,Y_test)
  1. from sklearn.model_selection import train_test_split
  2. Xtrain,Xtest,Ytrain,Ytest = train_test_split(wine.data,wine.target,test_size=0.3)#所有的特征和标签分开
  1. clf = DecisionTreeClassifier(random_state= 0)#实例化 #r是控制随机性andom_state是控制随机性
  2. rlf = RandomForestClassifier(random_state= 0)
  3. clf = clf.fit(Xtrain,Ytrain)#导入训练集训练
  4. rlf = rlf.fit(Xtrain,Ytrain)#导入训练集
  5. score_c = clf.score(Xtest,Ytest)#模型精确性的标准,等于accuracy
  6. score_r = rlf.score(Xtest,Ytest)
  1. print("Single Tree:{}".format(score_c)
  2. ,"Random Forest:{}".format(score_r))
Single Tree:0.9814814814814815 Random Forest:1.0

#交叉验证:一个结果会对模型造成影响,希望在探究在不同的测试集和训练集来看模型的稳定性
#cross_val_score交叉验证不用自己分测试集和训练集,交叉验证输入的是完整的特征矩阵和完整的标签,cv是交叉验证的次数

  1. from sklearn.model_selection import cross_val_score
  2. import matplotlib.pyplot as plt
  3. rfc = RandomForestClassifier(n_estimators = 25)
  4. rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10)
  5. clf = DecisionTreeClassifier()
  6. clf_s = cross_val_score(clf,wine.data,wine.target,cv=10)
  7. plt.plot(range(1,11),rfc_s,label="RandomForest")
  8. plt.plot(range(1,11),clf_s,label="DecisionTree")
  9. plt.legend()#请显示图例
  10. plt.show()
  11. #结果显示在每次交叉验证当中,随机森马的准确性大于等于决策树

  1. superpa = []#实例化
  2. for i in range(200):
  3. rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)#跑了200次#
  4. rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
  5. superpa.append(rfc_s)
  6. print(max(superpa),superpa.index(max(superpa))+1)#最高准确率以及它所在的位置
  7. plt.figure(figsize=[20,5])
  8. plt.plot(range(1,201),superpa)
  9. plt.show()
  10. #List.index(object),对象object在列表List当中的索引superpa.index(max(superpa))+1,其实是63
0.9944444444444445 62

  1. #10次建模下的100次交叉验证的图像
  2. rfc_l = []
  3. clf_l = []
  4. #建立两个列表来保存结果
  5. for i in range(10):#每个模型进行十次交叉验证,总共100次交叉验证。
  6. rfc = RandomForestClassifier(n_estimators=25)
  7. rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
  8. rfc_l.append(rfc_s)#把求得的平均值放到rfc_l这个列表中去
  9. clf = DecisionTreeClassifier()
  10. clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
  11. clf_l.append(clf_s)#把求得的平均值放到clf_l这个列表中去
  12. plt.plot(range(1,11),r、fc_l,label="RandomForest")
  13. plt.plot(range(1,11),clf_l,label="DecisionTree")
  14. plt.legend()
  15. plt.show()
  16. #结果:随着建模次数越来越多,随机森林比决策树好的越来越多

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/483902
推荐阅读
相关标签
  

闽ICP备14008679号