赞
踩
scikit-opt
包,尝试使用启发式算法对机器学习模型参数进行优化。pip install scikit-opt
#加载包 import numpy as np import pandas as pd from plotnine import* import seaborn as sns from scipy import stats import matplotlib as mpl import matplotlib.pyplot as plt #中文显示问题 plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False # notebook嵌入图片 %matplotlib inline # 提高分辨率 %config InlineBackend.figure_format='retina' # 切分数据 from sklearn.model_selection import train_test_split # 评价指标 from sklearn.metrics import mean_squared_error # 忽略警告 import warnings warnings.filterwarnings('ignore') # 导入粒子群算法 from sko.PSO import PSO # 交叉检验 from sklearn.model_selection import cross_val_score from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score
def demo_func(x):
x1, x2 = x
return -20 * np.exp(-0.2 * np.sqrt(0.5 * (x1 ** 2 + x2 ** 2))) - np.exp(
0.5 * (np.cos(2 * np.pi * x1) + np.cos(2 * np.pi * x2))) + 20 + np.e
,
一定要加,否则会报错constraint_ueq = (lambda x: (x[0] - 1) ** 2 + (x[1] - 0) ** 2 - 0.5 ** 2,)
# 迭代次数
max_iter = 50
pso = PSO(func=demo_func,
n_dim=2,pop=40,
max_iter=max_iter,
lb=[-2, -2],
ub=[2, 2],
constraint_ueq=constraint_ueq)
pso.record_mode = True
pso.run()
print('best_x is ', pso.gbest_x, 'best_y is', pso.gbest_y)
输出:
best_x is [9.52188498e-01 4.39396655e-05] best_y is [2.57992762]
plt.figure(dpi = 300)
plt.plot(pso.gbest_y_hist)
plt.show()
# 生成分类数据
from sklearn.datasets import make_classification
x, y = make_classification(n_samples=1000, # 样本个数
n_features=3, # 特征个数
n_informative=2, # 有效特征个数
n_redundant=1, # 冗余特征个数(有效特征的随机组合)
n_repeated=0, # 重复特征个数(有效特征和冗余特征的随机组合)
n_classes=3, # 样本类别
n_clusters_per_class=1, # 簇的个数
random_state=42)
# 可视化分类数据集,只选择前两个特征
plt.figure(dpi = 600,figsize = (6,4))
plt.scatter(x[:,0],x[:,1],c = y)
# 切分数据集
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=45, test_size=0.2)
# K邻近算法
from sklearn import neighbors
knn_clf_old = neighbors.KNeighborsClassifier()
knn_clf_old.fit(x_train,y_train)
print('训练集准确率:{:.2f}'.format(accuracy_score(y_train, knn_clf_old.predict(x_train))))
print('测试集准确率:{:.2f}'.format(accuracy_score(y_test, knn_clf_old.predict(x_test))))
输出:
训练集准确率:0.95
测试集准确率:0.92
f1_macro
平均值f1_macro
值取负def knn_score(x):
knn_score = cross_val_score(neighbors.KNeighborsClassifier(n_neighbors = int(x)),
x_train,y_train,scoring='f1_macro',cv=10).mean()
return -knn_score
max_iter = 50
knn_pso = PSO(func=knn_score,
n_dim=1,pop=40,
max_iter=max_iter,
lb=[1],ub=[100])
knn_pso.record_mode = True
knn_pso.run()
print('best_k is ', int(knn_pso.gbest_x))
输出:
best_k is [12.5256584]
plt.figure(dpi = 300)
plt.plot(knn_pso.gbest_y_hist)
plt.show()
可以看到当k为12时,算法收敛。
knn_clf_new = neighbors.KNeighborsClassifier(n_neighbors = 12)
knn_clf_new.fit(x_train,y_train)
print('训练集准确率:{:.2f}'.format(accuracy_score(y_train, knn_clf_new.predict(x_train))))
print('测试集准确率:{:.2f}'.format(accuracy_score(y_test, knn_clf_new.predict(x_test))))
输出:
训练集准确率:0.93
测试集准确率:0.90
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。