赞
踩
环境
依赖库
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV #网格搜索
from sklearn.metrics import make_scorer
from sklearn.metrics import r2_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb
import joblib
调参核心代码
def tune_parameter(train_data_path, test_data_path, n_input, n_output, version): # 模型调参 x,y = load_data(version, 'train', train_data_path, n_input, n_output) train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.2,random_state=2022) gsc = GridSearchCV( estimator=xgb.XGBRegressor(seed=42), param_grid={"learning_rate": [0.05, 0.10, 0.15], "n_estimators":[400, 500, 600, 700], "max_depth": [ 3, 5, 7], "min_child_weight": [ 1, 3, 5, 7], "gamma":[ 0.0, 0.1, 0.2], "colsample_bytree":[0.7, 0.8, 0.9], "subsample":[0.7, 0.8, 0.9], }, cv=3, scoring='neg_mean_squared_error', verbose=0, n_jobs=4) grid_result = MultiOutputRegressor(gsc).fit(train_x, train_y) #best_params = grid_result.estimators_[0].best_params_ print('-'*20) print('best_params:') for i in range(len(grid_result.estimators_)): print(i, grid_result.estimators_[i].best_params_) model = grid_result pre_y = model.predict(test_x) print('-'*20) #计算决策系数r方 r2 = performance_metric(test_y, pre_y) print('test_r2 = ', r2) def performance_metric(y_true, y_predict): score = r2_score(y_true,y_predict) MSE=np.mean(( y_predict- y_true)**2) print('RMSE: ',MSE**0.5) MAE=np.mean(np.abs( y_predict- y_true)) print('MAE: ',MAE) return score
保存调参后的模型,增加下面代码即可
szZack的博客
joblib.dump(model, './ml_data/xgb_%d_%d_%s.model' %(n_input, n_output, version))
调参:修改 param_grid 为自己的参数即可
param_grid = {
"learning_rate": [0.05, 0.10, 0.15],
"n_estimators":[400, 500, 600, 700],
"max_depth": [ 3, 5, 7],
"min_child_weight": [ 1, 3, 5, 7],
"gamma":[ 0.0, 0.1, 0.2],
"colsample_bytree":[0.7, 0.8, 0.9],
"subsample":[0.7, 0.8, 0.9],
}
szZack的博客
调参线程数量 n_jobs=4 ,可根据自己的机器设定,也可以设置 n_jobs=-1
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。