赞
踩
目录
,废话不多说,先看看代码实验结果
提示:以下是本篇文章正文内容,下面案例可供参考
要是报错的话可以在pycharm安装包,要是不行就在命令窗口输入pip install +包名
- import matplotlib.pyplot as plt
- from sklearn import neighbors
- import numpy as np
- import pandas as pd
- import seaborn as sns
- from sklearn import model_selection
- from sklearn import metrics
导入数据并查看前5行代码
- df1 = pd.read_csv(r'D:\python\iris.csv')
- print(df1.head())#输出前五行
- predictors = df1.columns[:-1]
代码如下:
- x_train,x_test,y_train,y_test=model_selection.train_test_split(
- df1[predictors],df1.Species,
- test_size=0.5,
- random_state = 1234
- )
- print(np.ceil(np.log2(df1.shape[0])))
#设置待测试的不同K值 K = np.arange(1,np.ceil(np.log2(df1.shape[0]))) print(np.arange(1,np.ceil(np.log2(df1.shape[0])))) #设置空列表,用于储存平均准确率 accuracy = []
使用十重交叉验证K值,并做出最适合K值的折线图
#使用十重交叉验证的方法 for k in K: cv_result = model_selection.cross_val_score\ (neighbors.KNeighborsClassifier(n_neighbors=int(k), weights='distance'), x_train, y_train, cv=10, scoring='accuracy') accuracy.append(cv_result.mean()) #从K个平均准确率中挑选出最大值做对应的目标 arg_max = np.array(accuracy).argmax()
#中文负号正常显示 plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False #绘制不同k值与准确率之间的折线图 plt.plot(K,accuracy) plt.scatter(K,accuracy) plt.text(K[arg_max],accuracy[arg_max],'最佳K值为%s'%int(K[arg_max])) plt.show()
代入K值,进行模型拟合
#重新构建模型,并将最佳邻近数个数设置为7 knn_class = neighbors.KNeighborsClassifier(n_neighbors=7,weights='distance') #模型拟合 knn_class.fit(x_train,y_train) #模型在测试集上的预测 predict = knn_class.predict(x_test)
#构建混淆矩阵 cm = pd.crosstab(predict,y_test) print(f'鸢尾花种类混淆矩阵\n{cm}') #热力图输出 cm = pd.DataFrame(cm,columns=['setosa','versicolor','virginica'], index=['setosa','versicolor','virginica']) sns.heatmap(cm,annot=True,cmap='GnBu') plt.xlabel('Real Lable') plt.ylabel('Predict Lable') plt.title('鸢尾花种类热力图') plt.show() #显示各类预测准确率 b = metrics.classification_report(y_test,predict) print(f'显示各类预测准确率\n{b}')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。