赞
踩
import lightgbm as lgb import numpy as np from sklearn.model_selection import StratifiedKFold from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score import warnings warnings.filterwarnings('ignore') X_train, X_test = data[~data['label'].isna()], data[data['label'].isna()] y = X_train['label'] KF = StratifiedKFold(n_splits=5, shuffle=True, random_state=2021) # params = { # 'verbose': -1, # 'num_leaves':64, # 'max_depth':10, # 'learning_rate':0.01, # 'n_estimators':10000, # 'subsample':0.8, # 'feature_fraction':0.8, # 'reg_alpha':0.5, # 'reg_lambda':0.5, # 'random_state':100, # 'metric':'auc' # } parameters = { 'learning_rate': 0.05, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'num_leaves': 32, 'feature_fraction': 0.8, 'bagging_fraction': 0.8, 'bagging_freq': 5, 'seed': 2020, 'bagging_seed': 1, 'feature_fraction_seed': 7, 'min_data_in_leaf': 20, 'n_jobs': -1, 'verbose': -1, } oof_lgb = np.zeros(len(X_train)) for fold_, (trn_idx, val_idx) in enumerate(KF.split(X_train.values, y.values)): print("fold n°{}".format(fold_)) trn_data = lgb.Dataset(X_train.iloc[trn_idx][features],label=y.iloc[trn_idx]) val_data = lgb.Dataset(X_train.iloc[val_idx][features],label=y.iloc[val_idx]) num_round = 10000 clf = lgb.train( params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=500, early_stopping_rounds=100, ) oof_lgb[val_idx] = clf.predict(X_train.iloc[val_idx][features], num_iteration=clf.best_iteration) clf.save_model(f'model/model_{fold_}.txt') print("AUC score: {}".format(roc_auc_score(y, oof_lgb))) print("F1 score: {}".format(f1_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb]))) print("Precision score: {}".format(precision_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb]))) print("Recall score: {}".format(recall_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。