当前位置:   article > 正文

贝叶斯优化xgboost的超参数最简实现_xgboost回归贝叶斯调优

xgboost回归贝叶斯调优
from hyperopt import hp
from hyperopt import hp, fmin, tpe, Trials, partial
from hyperopt.early_stop import no_progress_loss
import warnings
warnings.filterwarnings("ignore")
import numpy as np
# from OptMetrics import MyMetric
# from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn import datasets
from MyLogColor import log,LogLevel
import time
from sklearn.metrics import precision_recall_curve,auc,f1_score,roc_curve,auc

cancer=datasets.load_breast_cancer()
x=cancer.data
y=cancer.target

def Rollover(x):
    x = x.astype(bool)
    x = ~x
    x = x.astype(int)
    return x
####TODO:将少数变成正例
y = Rollover(y)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state = 42)


def ROC_AUC(test_y, proba):
    fpr,tpr,threshold = roc_curve(test_y, proba)
    roc_auc_ = auc(fpr,tpr)
    return roc_auc_


historical_metrics = []
historical_params = {}

boosters = ['gbtree','gblinear','dart']
sampling_methods = ['uniform','gradient_based']
tree_methods = ["auto","exact","approx","hist"]
refresh_leafs = [0,1]
# process_types = ["default","update"]#,"refresh","prune"]
grow_policys = ["depthwise","lossguide"]
# sample_types = ["uniform","weighted"]
normalize_types = ["tree","forest"]
rate_drops = []



param_grid_hp = {
    'boosters':hp.choice('boosters',boosters)
    ,"n_estimators":hp.quniform("n_estimators",50,1000,1)
    ,"learning_rate":hp.uniform("learning_rate",1e-5,1)
    ,"gamma":hp.quniform("gamma",0,100,1)
    ,"max_depth":hp.quniform("max_depth",6,200,1)
    ,"min_child_weight":hp.quniform("min_child_weight",0,100,1)
    ,"max_delta_step":hp.quniform("max_delta_step",0,100,1)
    ,"subsample":hp.uniform("subsample",0,1)
    # ,"sampling_method":hp.choice("sampling_method",sampling_methods)
    ,"colsample_bytree":hp.uniform("colsample_bytree",0,1)
    ,"colsample_bylevel":hp.uniform("colsample_bylevel",0,1)
    ,"colsample_bynode":hp.uniform("colsample_bynode",0,1)
    ,"lambda":hp.quniform("lambda",0,200,1)
    ,"alpha":hp.quniform("alpha",0,200,1)
    ,"tree_method":hp.choice("tree_method",tree_methods)
    # ,"scale_pos_weight":hp.uniform("scale_pos_weight",0,1000)
    ,"refresh_leaf":hp.choice("refresh_leaf",refresh_leafs)
    # ,"process_type":hp.choice("process_type",process_types)
    ,"grow_policy":hp.choice("grow_policy",grow_policys)
    ,"max_leaves":hp.quniform("max_leaves",0,10000,1)
    ,"max_bin":hp.quniform("max_bin",256,1000,1)
    ,"num_parallel_tree":hp.quniform("num_parallel_tree",1,100,1)   
}
# booster_dart_params = {
#     "sample_type":hp.choice("sample_type",sample_types)
#     ,"normalize_type":hp.choice("normalize_type",normalize_types)
#     ,"rate_drop":hp.uniform("rate_drop",0,1)
#     ,"one_drop":hp.quniform("one_drop",0,1000,1)
#     ,"skip_drop":hp.uniform("skip_drop",0,1)
# }

booster_gblinear_params = {
    
}

def PR_AUC(test_y,proba,pred):
    precision,recall,_ = precision_recall_curve(test_y,proba)
    f1 ,pr_auc = f1_score(test_y,pred),auc(recall,precision)
    return pr_auc

def hyperopt_objective(hyperopt_params): 
    params = {
        "objective":"binary:logistic"
        ,'boosters':hyperopt_params['boosters']
        ,"n_estimators":int(hyperopt_params["n_estimators"])
        ,"learning_rate":hyperopt_params["learning_rate"]
        ,"gamma":hyperopt_params["gamma"]
        ,"max_depth":int(hyperopt_params["max_depth"])
        ,"min_child_weight":int(hyperopt_params["min_child_weight"])
        ,"max_delta_step":int(hyperopt_params["max_delta_step"])
        ,"subsample":hyperopt_params["subsample"]
        ,"verbosity":0
        # ,"sampling_method":hyperopt_params["sampling_method"]
        ,"colsample_bytree":hyperopt_params["colsample_bytree"]
        ,"colsample_bylevel":hyperopt_params["colsample_bylevel"]
        ,"colsample_bynode":hyperopt_params["colsample_bynode"]
        ,"lambda":int(hyperopt_params["lambda"])
        ,"alpha":int(hyperopt_params["alpha"])
        ,"tree_method":hyperopt_params["tree_method"]
        ,"scale_pos_weight":(y_train==0).sum()/(y_train==1).sum()
        ,"refresh_leaf":hyperopt_params["refresh_leaf"]
        # ,"process_type":hyperopt_params["process_type"]
        ,"grow_policy":hyperopt_params["grow_policy"]
        ,"max_leaves":int(hyperopt_params["max_leaves"])
        ,"max_bin":int(hyperopt_params["max_bin"])
        ,"num_parallel_tree":int(hyperopt_params["num_parallel_tree"])   
    }
    # booster_dart_params = {
    #     "sample_type":hyperopt_params["sample_type"]
    #     ,"normalize_type":hp.choice("normalize_type",normalize_types)
    #     ,"rate_drop":hyperopt_params["rate_drop"]
    #     ,"one_drop":int(hyperopt_params["one_drop"])
    #     ,"skip_drop":hyperopt_params["skip_drop"]
    # }
    dtrain = xgb.DMatrix(x_train,label=y_train)
    clf = xgb.train(params=params
                   ,dtrain=dtrain
                   ,num_boost_round=100
                   ,evals=[(dtrain,"train")]
                   ,verbose_eval=False # 不显示训练信息就改False
                   # ,obj=logistic_obj
                   )
    dtest = xgb.DMatrix(x_val,label=y_val)
    xgboost_proba = clf.predict(dtest)
    # xgbosst_proba = np.nan_to_num(xgboost_proba,0)
    
    global NOW_FUC_RUN_ITER
    NOW_FUC_RUN_ITER += 1
    metric = ROC_AUC(y_val,xgboost_proba)
    historical_metrics.append(metric)
    historical_params.update({NOW_FUC_RUN_ITER-1:params})
    return - metric

def param_hyperopt(max_evals=100):

    #保存迭代过程
    trials = Trials()

    #设置提前停止
    early_stop_fn = no_progress_loss(100)

    #定义代理模型
    #algo = partial(tpe.suggest, n_startup_jobs=20, n_EI_candidates=50)
    params_best = fmin(hyperopt_objective #目标函数
                       , space = param_grid_hp #参数空间
                       , algo = tpe.suggest #代理模型
                       #, algo = algo
                       , max_evals = max_evals #允许的迭代次数
                       , verbose=True
                       , trials = trials
                       , early_stop_fn = early_stop_fn
                      )

    #打印最优参数,fmin会自动打印最佳分数
    print("\n","\n","best params: ", params_best,
          "\n")
    return params_best, trials

NOW_FUC_RUN_ITER = 0
PARAMS_BEST, Trials = param_hyperopt(600)

historical_metrics = np.array(historical_metrics)
idx = np.argmax(historical_metrics)
params = historical_params[idx]
dtrain = xgb.DMatrix(x_train,label=y_train)
clf = xgb.train(params=params
               ,dtrain=dtrain
               ,num_boost_round=100
               ,evals=[(dtrain,"train")]
               ,verbose_eval=False # 不显示训练信息就改False
               # ,obj=logistic_obj
               )
dtest = xgb.DMatrix(x_val,label=y_val)
xgboost_proba = clf.predict(dtest)
print("测试优化结果",ROC_AUC(y_val,xgboost_proba))
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/神奇cpp/article/detail/742255
推荐阅读
相关标签
  

闽ICP备14008679号