赞
踩
- %matplotlib inline
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.datasets import load_wine
wine = load_wine()
wine
[{"metadata":{"trusted":false,"scrolled":false},"id":"e7e71df6","cell_type":"code","source":"wine","execution_count":88,"outputs":[{"data":{"text/plain":"{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,\n 1.065e+03],\n [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,\n 1.050e+03],\n [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,\n 1.185e+03],\n ...,\n [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,\n 8.350e+02],\n [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,\n 8.400e+02],\n [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,\n 5.600e+02]]),\n 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,\n 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n 2, 2]),\n 'frame': None,\n 'target_names': array(['class_0', 'class_1', 'class_2'], dtype='
wine.data.shape
(178, 13)
wine.target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
- #实例化
- #训练集带入实例化后的模型去进行训练,使用的接口是fit
- #使用其他接口将测试集导入我们训练好的模型,去获取我们是希望获取的结果()score,Y_test)
- from sklearn.model_selection import train_test_split
-
- Xtrain,Xtest,Ytrain,Ytest = train_test_split(wine.data,wine.target,test_size=0.3)#所有的特征和标签分开
- clf = DecisionTreeClassifier(random_state= 0)#实例化 #r是控制随机性andom_state是控制随机性
- rlf = RandomForestClassifier(random_state= 0)
-
- clf = clf.fit(Xtrain,Ytrain)#导入训练集训练
- rlf = rlf.fit(Xtrain,Ytrain)#导入训练集
-
- score_c = clf.score(Xtest,Ytest)#模型精确性的标准,等于accuracy
- score_r = rlf.score(Xtest,Ytest)
- print("Single Tree:{}".format(score_c)
- ,"Random Forest:{}".format(score_r))
Single Tree:0.9814814814814815 Random Forest:1.0
#交叉验证:一个结果会对模型造成影响,希望在探究在不同的测试集和训练集来看模型的稳定性
#cross_val_score交叉验证不用自己分测试集和训练集,交叉验证输入的是完整的特征矩阵和完整的标签,cv是交叉验证的次数
- from sklearn.model_selection import cross_val_score
- import matplotlib.pyplot as plt
-
- rfc = RandomForestClassifier(n_estimators = 25)
- rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10)
-
- clf = DecisionTreeClassifier()
- clf_s = cross_val_score(clf,wine.data,wine.target,cv=10)
-
- plt.plot(range(1,11),rfc_s,label="RandomForest")
- plt.plot(range(1,11),clf_s,label="DecisionTree")
- plt.legend()#请显示图例
- plt.show()
- #结果显示在每次交叉验证当中,随机森马的准确性大于等于决策树
- superpa = []#实例化
- for i in range(200):
- rfc = RandomForestClassifier(n_estimators=i+1,n_jobs=-1)#跑了200次#
- rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
- superpa.append(rfc_s)
- print(max(superpa),superpa.index(max(superpa))+1)#最高准确率以及它所在的位置
- plt.figure(figsize=[20,5])
- plt.plot(range(1,201),superpa)
- plt.show()
-
- #List.index(object),对象object在列表List当中的索引superpa.index(max(superpa))+1,其实是63
0.9944444444444445 62
- #10次建模下的100次交叉验证的图像
-
- rfc_l = []
- clf_l = []
- #建立两个列表来保存结果
- for i in range(10):#每个模型进行十次交叉验证,总共100次交叉验证。
- rfc = RandomForestClassifier(n_estimators=25)
- rfc_s = cross_val_score(rfc,wine.data,wine.target,cv=10).mean()
- rfc_l.append(rfc_s)#把求得的平均值放到rfc_l这个列表中去
-
- clf = DecisionTreeClassifier()
- clf_s = cross_val_score(clf,wine.data,wine.target,cv=10).mean()
- clf_l.append(clf_s)#把求得的平均值放到clf_l这个列表中去
-
- plt.plot(range(1,11),r、fc_l,label="RandomForest")
- plt.plot(range(1,11),clf_l,label="DecisionTree")
- plt.legend()
- plt.show()
-
- #结果:随着建模次数越来越多,随机森林比决策树好的越来越多
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。