赞
踩
个人认为 K 折交叉验证是通过 K 次平均结果,用来评价测试模型或者该组参数的效果好坏,通过 K折交叉验证之后找出最优的模型和参数,最后预测还是重新训练预测一次。
from sklearn.model_selection import KFold import lightgbm as lgb import numpy as np import pandas as pd train_data2 = pd.read_csv('./data/zhengqi_train.txt', sep='\t') test_data2 = pd.read_csv('./data/zhengqi_test.txt', sep='\t') train_data2_f = train_data2[test_data2.columns].values train_data2_target = train_data2['target'].values # 5 折交叉验证 Folds = 5 kf = KFold(n_splits=5) # 记录训练和预测 MSE MSE_DICT = {'train_mse': [], 'test_mse': []} # 线下训练预测 for i, (train_index, test_index) in enumerate(kf.split(train_data2_f)): # LGB 树模型 lgb_reg = lgb.LGBMRegressor( learning_rate=0.01, max_depth=-1, n_estimators=5000, boosting_type='gbdt', random_state=2019, objective='regression' ) # 切分训练集和预测集 X_train_KFold, X_test_KFold = train_data2_f[train_index], train_data2_f[test_index] y_train_KFold, y_test_KFold = train_data2_target[train_index], train_data2_target[test_index] # 训练模型 lgb_reg.fit( X=X_train_KFold, y=y_train_KFold, eval_set=[(X_train_KFold, y_train_KFold), (X_test_KFold, y_test_KFold)], eval_names=['Train', 'Test'], early_stopping_rounds=200, eval_metric='MSE', verbose=50 ) # 训练集和测试集预测 y_train_KFold_predict = lgb_reg.predict(X_train_KFold, num_iteration=lgb_reg.best_iteration_) y_test_KFold_predict = lgb_reg.predict(X_test_KFold, num_iteration=lgb_reg.best_iteration_) print(f"第{i+1}折 训练和预测 训练MSE 预测MSE") train_mse = mean_squared_error(y_train_KFold_predict, y_train_KFold) print('------\n', '训练MSE\n', train_mse, '\n------') test_mse = mean_squared_error(y_test_KFold_predict, y_test_KFold) print('------\n', '预测MSE\n', test_mse, '\n------') MSE_DICT['train_mse'].append(train_mse) MSE_DICT['test_mse'].append(test_mse) print('------\n', '训练MSE\n', MSE_DICT['train_mse'], '\n', np.mean(MSE_DICT['train_mse']), '\n------') print('------\n', '预测MSE\n', MSE_DICT['test_mse'], '\n', np.mean(MSE_DICT['test_mse']), '\n------')
import lightgbm as lgb
import joblib
# 模型训练
lgb_reg = lgb.LGBMRegressor(objective='regression', num_leaves=31, learning_rate=0.05, n_estimators=20)
lgb_reg.fit(train_data, train_target, eval_set=[test_data, test_target], eval_metric='l1', early_stopping_rounds=5)
# 保存模型
joblib.dump(lgb_reg, 'model.pkl')
# 模型加载
lgb_reg = joblib.load('model.pkl')
# 模型预测
test_predict = lgb_reg.predict(test_data, num_iteration=lgb_reg.best_iteration_)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。