赞
踩
在集成学习中,我们会训练多个模型(通常称为 弱学习器)解决相同的问题,并将它们结合起来以获得更好的结果。最重要的假设是:当弱模型被正确组合时,我们可以得到更精确和/或更鲁棒的模型。
GBDT 速度慢,准确性差
xgboost——梯度提升决策树模型
不支持直接导入类别型变量,需要预先对类别型变量做独热编码等处理
XGBoost使用的是pre-sorted算法,能够更精确的找到数据分隔点。
这种pre-sorting算法能够准确找到分裂点,但是在空间和时间上有很大的开销
lightgbm
import lightgbm as lgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split # 加载数据 iris = load_iris() data = iris.data target = iris.target X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2) # 创建模型,训练模型 gbm = lgb.LGBMRegressor(objective='regression', num_leaves=31, learning_rate=0.05, n_estimators=20) gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric='l1', early_stopping_rounds=5) # 测试机预测 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) # 模型评估 print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5) # feature importances print('Feature importances:', list(gbm.feature_importances)) # 网格搜索,参数优化 estimator = lgb.LGBMRegressor(num_leaves=31) param_grid = {'learning_rate': [0.01, 0.1, 1],'n_estimators': [20, 40]} gbm = GridSearchCV(estimator, param_grid) gbm.fit(X_train, y_train) print('Best parameters:', gbm.best_params_)
import lightgbm as lgb from sklearn.metrics import mean_squared_error from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split iris = load_iris() data = iris.data target = iris.target X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2) # 创建成lgb特征的数据集格式 lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) # 将参数写成字典下形式 params = {'task': 'train', 'boosting_type': 'gbdt', # 设置提升类型 'objective': 'regression', # 目标函数 'metric': {'l2', 'auc'}, # 评估函数 'num_leaves': 31, # 叶子节点数 'learning_rate': 0.05, # 学习速率 'feature_fraction': 0.9, # 建树的特征选择比例 'bagging_fraction': 0.8, # 建树的样本采样比例 'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging 'verbose': 1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息 } # 训练 cv and traingbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, early_stopping_rounds=5) # 保存模型到文件 gbm.save_model('model.txt') # 预测数据集 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) # 评估模型 print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。