赞
踩
# 引入鸢尾花数据集
from sklearn.datasets import load_iris
# 加载数据并显示,注意数据中data和target是分开的
iris = load_iris()
iris
{'data': array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1],
…
‘target’: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
‘frame’: None,
‘target_names’: array([‘setosa’, ‘versicolor’, ‘virginica’], dtype=‘<U10’),
‘DESCR’: ‘… _iris_dataset:\n\nIris plants dataset\n--------------------\n\nData Set Characteristics:\n\n :Number of Instances: 150 (50 in each of three classes)\n :Number of Attributes: 4 numeric, predictive attributes and the class\n :Attribute Information:\n - sepal length in cm\n - sepal width in cm\n - petal length in cm\n - petal width in cm\n - class:\n - Iris-Setosa\n - Iris-Versicolour\n - Iris-Virginica\n \n :Summary Statistics:\n\n ============== ==== ==== ======= ===== ====================\n Min Max Mean SD Class Correlation\n ============== ==== ==== ======= ===== ====================\n sepal length: 4.3 7.9 5.84 0.83 0.7826\n sepal width: 2.0 4.4 3.05 0.43 -0.4194\n petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n ============== ==== ==== ======= ===== ====================\n\n :Missing Attribute Values: None\n :Class Distribution: 33.3% for each of 3 classes.\n :Creator: R.A. Fisher\n :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher’s paper. Note that it’s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature. Fisher’s paper is a classic in the field and\nis referenced frequently to this day. (See Duda & Hart, for example.) The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant. One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n… topic:: References\n\n - Fisher, R.A. “The use of multiple measurements in taxonomic problems”\n Annual Eugenics, 7, Part II, 179-188 (1936); also in “Contributions to\n Mathematical Statistics” (John Wiley, NY, 1950).\n - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n - Dasarathy, B.V. (1980) “Nosing Around the Neighborhood: A New System\n Structure and Classification Rule for Recognition in Partially Exposed\n Environments”. IEEE Transactions on Pattern Analysis and Machine\n Intelligence, Vol. PAMI-2, No. 1, 67-71.\n - Gates, G.W. (1972) “The Reduced Nearest Neighbor Rule”. IEEE Transactions\n on Information Theory, May 1972, 431-433.\n - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II\n conceptual clustering system finds 3 classes in the data.\n - Many, many more …’,
‘feature_names’: [‘sepal length (cm)’,
‘sepal width (cm)’,
‘petal length (cm)’,
‘petal width (cm)’],
‘filename’: ‘iris.csv’,
‘data_module’: ‘sklearn.datasets.data’}
# 引入20newsgroups数据集
from sklearn.datasets import fetch_20newsgroups
# 加载数据
news = fetch_20newsgroups(data_home="../data/")
news
{'data': ["From: lerxst@wam.umd.edu (where's my thing)\nSubject: WHAT car is this!?\nNntp-Posting-Host: rac3.wam.umd.edu\nOrganization: University of Maryland, College Park\nLines: 15\n\n I was wondering if anyone out there could enlighten me on this car I saw\nthe other day. It was a 2-door sports car, looked to be from the late 60s/\nearly 70s. It was called a Bricklin. The doors were really small. In addition,\nthe front bumper was separate from the rest of the body. This is \nall I know. If anyone can tellme a model name, engine specs, years\nof production, where this car is made, history, or whatever info you\nhave on this funky looking car, please e-mail.\n\nThanks,\n- IL\n ---- brought to you by your neighborhood Lerxst ----\n\n\n\n\n",,.........此处省略若干
# 引入鸢尾花数据集
from sklearn.datasets import load_iris
# 加载数据
iris = load_iris()
# 打印数据
print("鸢尾花数据集的返回值\n", iris)
# 返回值是一个继承自字典的Bunch
print("鸢尾花的特征值\n", iris["data"])
print("鸢尾花的目标值\n", iris.target)
print("鸢尾花特征的名字\n", iris.feature_names)
print("鸢尾花目标值的名字\n", iris.target_names)
print("鸢尾花的描述\n", iris.DESCR)
鸢尾花数据集的返回值 {'data': array([[5.1, 3.5, 1.4, 0.2], [4.9, 3. , 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5. , 3.6, 1.4, 0.2], 此处省略若干, [5.9, 3. , 5.1, 1.8]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), 'frame': None, 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10'), 'DESCR': '.. _iris_dataset:\n\nIris plants dataset\n--------------------\n\n**Data Set Characteristics:**\n\n :Number of Instances: 150 (50 in each of three classes)\n :Number of Attributes: 4 numeric, predictive attributes and the class\n :Attribute Information:\n - sepal length in cm\n - sepal width in cm\n - petal length in cm\n - petal width in cm\n - class:\n - Iris-Setosa\n - Iris-Versicolour\n - Iris-Virginica\n \n :Summary Statistics:\n\n ============== ==== ==== ======= ===== ====================\n Min Max Mean SD Class Correlation\n ============== ==== ==== ======= ===== ====================\n sepal length: 4.3 7.9 5.84 0.83 0.7826\n sepal width: 2.0 4.4 3.05 0.43 -0.4194\n petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n ============== ==== ==== ======= ===== ====================\n\n :Missing Attribute Values: None\n :Class Distribution: 33.3% for each of 3 classes.\n :Creator: R.A. Fisher\n :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher\'s paper. Note that it\'s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature. Fisher\'s paper is a classic in the field and\nis referenced frequently to this day. (See Duda & Hart, for example.) The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant. One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n.. topic:: References\n\n - Fisher, R.A. "The use of multiple measurements in taxonomic problems"\n Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n Mathematical Statistics" (John Wiley, NY, 1950).\n - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n Structure and Classification Rule for Recognition in Partially Exposed\n Environments". IEEE Transactions on Pattern Analysis and Machine\n Intelligence, Vol. PAMI-2, No. 1, 67-71.\n - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions\n on Information Theory, May 1972, 431-433.\n - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II\n conceptual clustering system finds 3 classes in the data.\n - Many, many more ...', 'feature_names': ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], 'filename': 'iris.csv', 'data_module': 'sklearn.datasets.data'} 鸢尾花的特征值 [[5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2] [4.7 3.2 1.3 0.2] [4.6 3.1 1.5 0.2] [5. 3.6 1.4 0.2] [5.4 3.9 1.7 0.4] [4.6 3.4 1.4 0.3]此处省略若干] 鸢尾花的目标值 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] 鸢尾花特征的名字 ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] 鸢尾花目标值的名字 ['setosa' 'versicolor' 'virginica'] 鸢尾花的描述 .. _iris_dataset: Iris plants dataset -------------------- **Data Set Characteristics:** :Number of Instances: 150 (50 in each of three classes) :Number of Attributes: 4 numeric, predictive attributes and the class :Attribute Information: - sepal length in cm - sepal width in cm - petal length in cm - petal width in cm - class: - Iris-Setosa - Iris-Versicolour - Iris-Virginica :Summary Statistics: ============== ==== ==== ======= ===== ==================== Min Max Mean SD Class Correlation ============== ==== ==== ======= ===== ==================== sepal length: 4.3 7.9 5.84 0.83 0.7826 sepal width: 2.0 4.4 3.05 0.43 -0.4194 petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) ============== ==== ==== ======= ===== ==================== :Missing Attribute Values: None :Class Distribution: 33.3% for each of 3 classes. :Creator: R.A. Fisher :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) :Date: July, 1988 The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken from Fisher's paper. Note that it's the same as in R, but not as in the UCI Machine Learning Repository, which has two wrong data points. This is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. .. topic:: References - Fisher, R.A. "The use of multiple measurements in taxonomic problems" Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to Mathematical Statistics" (John Wiley, NY, 1950). - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis. (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System Structure and Classification Rule for Recognition in Partially Exposed Environments". IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. PAMI-2, No. 1, 67-71. - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions on Information Theory, May 1972, 431-433. - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II conceptual clustering system finds 3 classes in the data. - Many, many more ...
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
# 把数据转化成dataframe的格式
iris_d = pd.DataFrame(iris["data"], columns=["Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width"])
iris_d["Species"] = iris.target
# 简单绘图
def iris_simple_plot(data, col1, col2):
sns.lmplot(x=col1, y=col2, data=data)
iris_simple_plot(iris_d, "Sepal_Length", "Petal_Width")
# 添加目标值,会根据 目标值的不同绘制散点并分别拟合曲线
def iris_plot_withhue(data, col1, col2, target):
sns.lmplot(x=col1, y=col2, data=data, hue=target)
iris_plot_withhue(iris_d, "Sepal_Length", "Petal_Width", "Species")
# 去除拟合曲线
def iris_plot_withhue_withoutfit(data, col1, col2, target):
sns.lmplot(x=col1, y=col2, data=data, hue=target, fit_reg=False)
iris_plot_withhue_withoutfit(iris_d, "Sepal_Length", "Petal_Width", "Species")
# 添加辅助信息
def iris_plot(data, col1, col2, target):
sns.lmplot(x=col1, y=col2, data=data, hue=target, fit_reg=False)
plt.title("鸢尾花数据展示")
plt.xlabel(col1)
plt.ylabel(col2)
plt.show()
iris_plot_withhue_withoutfit(iris_d, "Sepal_Length", "Petal_Width", "Species")
# 导入
from sklearn.model_selection import train_test_split
# 划分,四个返回值依次是训练集的特征值、测试集的特征值、训练集的目标值、测试集的目标值
# test_size是测试集占比,random_state是随机数种子,如果传入一样的整数值划分的结果就一样
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=None)
x_train, x_test, y_train, y_test
(array([[5. , 3.3, 1.4, 0.2], [6.3, 3.3, 4.7, 1.6], [6. , 3. , 4.8, 1.8], [6.2, 3.4, 5.4, 2.3], [6.2, 2.8, 4.8, 1.8], [5.1, 3.5, 1.4, 0.2], [6.3, 3.4, 5.6, 2.4],此处省略若干 array([0, 1, 2, 2, 2, 0, 2, 0, 0, 1, 1, 2, 2, 0, 1, 2, 0, 0, 0, 2, 1, 0, 1, 1, 1, 0, 0, 0, 2, 1, 0, 0, 1, 2, 2, 1, 2, 1, 0, 0, 0, 1, 1, 2, 2, 1, 0, 1, 2, 2, 2, 0, 2, 1, 2, 0, 1, 1, 0, 1, 2, 1, 0, 0, 2, 2, 2, 0, 1, 0, 0, 1, 0, 1, 1, 1, 2, 2, 1, 2, 0, 2, 1, 0, 2, 1, 2, 2, 2, 1, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 0, 2, 2, 1, 2]), array([0, 1, 0, 0, 1, 0, 2, 1, 2, 1, 0, 1, 1, 1, 2, 1, 2, 2, 1, 0, 2, 0, 1, 1, 0, 1, 1, 2, 2, 1, 2, 0, 2, 1, 0, 2, 0, 0, 0, 0, 2, 2, 1, 0, 1]))
sklearn.preprocessing
通过对原始数据进行变换把数据映射到(默认[0, 1])之间
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
# 数据准备
# txt也是通过read_csv来读取的
data = pd.read_csv("../data/datingTestSet.txt", names=["milage", "Liters" , "Consumtime", "target"],
header=None, sep="\t")
data
milage | Liters | Consumtime | target | |
---|---|---|---|---|
0 | 40920 | 8.326976 | 0.953952 | largeDoses |
1 | 14488 | 7.153469 | 1.673904 | smallDoses |
2 | 26052 | 1.441871 | 0.805124 | didntLike |
3 | 75136 | 13.147394 | 0.428964 | didntLike |
4 | 38344 | 1.669788 | 0.134296 | didntLike |
... | ... | ... | ... | ... |
995 | 11145 | 3.410627 | 0.631838 | smallDoses |
996 | 68846 | 9.974715 | 0.669787 | didntLike |
997 | 26575 | 10.650102 | 0.866627 | largeDoses |
998 | 48111 | 9.134528 | 0.728045 | largeDoses |
999 | 43757 | 7.882601 | 1.332446 | largeDoses |
1000 rows × 4 columns
# 归一化
# 实例化一个转换器
transfer = MinMaxScaler(feature_range=(0, 1))
# 调用fit_transform方法
data = transfer.fit_transform(data[["milage", "Liters" , "Consumtime"]])
print("归一化的结果:\n")
data
归一化的结果:
array([[0.44832535, 0.39805139, 0.56233353],
[0.15873259, 0.34195467, 0.98724416],
[0.28542943, 0.06892523, 0.47449629],
...,
[0.29115949, 0.50910294, 0.51079493],
[0.52711097, 0.43665451, 0.4290048 ],
[0.47940793, 0.3768091 , 0.78571804]])
归一化实现起来较为简单,但是存在一个致命的缺点,最大值最小值极易受到异常点的影响,鲁棒性很差,只适合传统精确小数据的场景
将原始数据变化到均值为0标准差为1的范围内
import pandas as pd from sklearn.preprocessing import StandardScaler # 数据准备 data = pd.read_csv("../data/datingTestSet.txt", names=["milage", "Liters" , "Consumtime", "target"], header=None, sep="\t") # 标准化 # 实例化一个转换器 transfer = StandardScaler() # 调用fit_transform方法 data = transfer.fit_transform(data[["milage", "Liters" , "Consumtime"]]) print("标准化的结果:\n", data) print("每一列的均值:\n", transfer.mean_) print("每一列的方差:\n", transfer.var_)
标准化的结果:
[[ 0.33193158 0.41660188 0.24523407]
[-0.87247784 0.13992897 1.69385734]
[-0.34554872 -1.20667094 -0.05422437]
...
[-0.32171752 0.96431572 0.06952649]
[ 0.65959911 0.60699509 -0.20931587]
[ 0.46120328 0.31183342 1.00680598]]
每一列的均值:
[3.36354210e+04 6.55996083e+00 8.32072997e-01]
每一列的方差:
[4.81628039e+08 1.79902874e+01 2.46999554e-01]
导入模块
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
从sklearn当中获取数据集,然后进行数据集的分隔
# 1.加载
iris = load_iris()
# 2.数据基本处理
# 因为数据已经被处理的比较规范了,所以只需要做分割即可
# 数据集的分割
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)
进行数据集标准化
应将训练集和测试集分开后再规范化处理,测试集使用的是训练集保留下的参数(归一化的max、min,标准化的mean、std),也就是训练集规范化时使用自己的最大值最小值、均值方差,测试集规范化计算时仍使用训练集的最大值最小值、均值方差。
# 3.特征工程 标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
模型进行训练预测
# 4.机器学习(模型训练)
estimator = KNeighborsClassifier(n_neighbors=5)
estimator.fit(x_train, y_train)
# 5.模型评估
# 方法1:比对真实值和预测值
y_predict = estimator.predict(x_test)
print("预测结果为:\n", y_predict)
print("比对真实值和预测值", y_predict==y_test)
# 方法2:直接计算准确率
score = estimator.score(x_test, y_test)
print("准确率为:\n", score)
预测结果为:
[2 2 2 2 2 1 0 2 2 2 0 2 2 1 2 1 2 2 2 1 0 2 1 0 0 2 0 0 2 2]
比对真实值和预测值 [ True True True True False True True True True True True True
True True True True True True True True True False True True
True True True True True True]
准确率为:
0.9333333333333333
交叉验证并不能提高训练出来的模型的准确性,只能更好地评估模型的准确性
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier # 1.加载 iris = load_iris() # 2.数据基本处理 # 因为数据已经被处理的比较规范了,所以只需要做分割即可 # 数据集的分割 x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2) # 3.特征工程 标准化 transfer = StandardScaler() x_train = transfer.fit_transform(x_train) x_test = transfer.transform(x_test) # 4.机器学习(模型训练) #4.1实例化一个估计器 estimator = KNeighborsClassifier(n_neighbors=5) # 4.2调用交叉验证网格搜索模型,cv代表几折交叉验证,n_jobs等于-1时CPU满负荷跑 param_grid = {"n_neighbors":[1, 3, 5, 7, 9]}# 字典,代表对应的参数要取的值,此时参数又叫超参数 estimator = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=10, n_jobs=-1) # 4.3训练模型 estimator.fit(x_train, y_train) # 5.模型评估 # 方法1:比对真实值和预测值 y_predict = estimator.predict(x_test) print("预测结果为:\n", y_predict) print("比对真实值和预测值", y_predict==y_test) # 方法2:直接计算准确率 score = estimator.score(x_test, y_test) print("准确率为:\n", score) # 5.3其他评价指标 print("最好的模型:\n", estimator.best_estimator_) print("最好的结果:\n", estimator.best_score_) print("最好的参数:\n", estimator.best_params_) print("模型的整体结果\n", estimator.cv_results_)
预测结果为: [2 0 0 2 2 1 1 2 2 0 2 1 0 0 2 0 1 2 1 1 2 0 0 1 2 2 0 1 0 1] 比对真实值和预测值 [ True True True True True True True True True True True True True True True True True True True True True True True True True True True True True True] 准确率为: 1.0 最好的模型: KNeighborsClassifier() 最好的结果: 0.95 最好的参数: {'n_neighbors': 5} 模型的整体结果 {'mean_fit_time': array([0.00090003, 0.00100024, 0.00070007, 0.00089962, 0.00090055]), 'std_fit_time': array([3.00011052e-04, 1.71611699e-06, 4.58304749e-04, 2.99886165e-04, 3.00188321e-04]), 'mean_score_time': array([0.0014998 , 0.00110073, 0.00170014, 0.00170119, 0.00129974]), 'std_score_time': array([0.00049987, 0.00030075, 0.0004584 , 0.00045899, 0.00064113]), 'param_n_neighbors': masked_array(data=[1, 3, 5, 7, 9], mask=[False, False, False, False, False], fill_value='?', dtype=object), 'params': [{'n_neighbors': 1}, {'n_neighbors': 3}, {'n_neighbors': 5}, {'n_neighbors': 7}, {'n_neighbors': 9}], 'split0_test_score': array([1., 1., 1., 1., 1.]), 'split1_test_score': array([1., 1., 1., 1., 1.]), 'split2_test_score': array([0.91666667, 0.83333333, 0.83333333, 0.91666667, 0.83333333]), 'split3_test_score': array([1., 1., 1., 1., 1.]), 'split4_test_score': array([0.91666667, 0.91666667, 0.91666667, 0.91666667, 0.91666667]), 'split5_test_score': array([1. , 1. , 1. , 0.91666667, 1. ]), 'split6_test_score': array([0.91666667, 0.91666667, 0.91666667, 0.91666667, 0.91666667]), 'split7_test_score': array([0.83333333, 0.91666667, 0.91666667, 0.83333333, 0.83333333]), 'split8_test_score': array([0.91666667, 0.91666667, 1. , 1. , 1. ]), 'split9_test_score': array([0.83333333, 0.83333333, 0.91666667, 0.91666667, 0.91666667]), 'mean_test_score': array([0.93333333, 0.93333333, 0.95 , 0.94166667, 0.94166667]), 'std_test_score': array([0.06236096, 0.06236096, 0.05527708, 0.05335937, 0.06508541]), 'rank_test_score': array([4, 4, 1, 2, 2])}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。