赞
踩
import pandas as pd import os import gc import lightgbm as lgb import xgboost as xgb from catboost import CatBoostRegressor from sklearn.linear_model import SGDRegressor, LinearRegression, Ridge from sklearn.preprocessing import MinMaxScaler import math import numpy as np from tqdm import tqdm from sklearn.model_selection import StratifiedKFold, KFold from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, log_loss import matplotlib.pyplot as plt import time import warnings warnings.filterwarnings('ignore')
train = pd.read_csv('train.csv')
testA = pd.read_csv('testA.csv')
train.head()
id | loanAmnt | term | interestRate | installment | grade | subGrade | employmentTitle | employmentLength | homeOwnership | ... | n5 | n6 | n7 | n8 | n9 | n10 | n11 | n12 | n13 | n14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 35000.0 | 5 | 19.52 | 917.97 | E | E2 | 320.0 | 2 years | 2 | ... | 9.0 | 8.0 | 4.0 | 12.0 | 2.0 | 7.0 | 0.0 | 0.0 | 0.0 | 2.0 |
1 | 1 | 18000.0 | 5 | 18.49 | 461.90 | D | D2 | 219843.0 | 5 years | 0 | ... | NaN | NaN | NaN | NaN | NaN | 13.0 | NaN | NaN | NaN | NaN |
2 | 2 | 12000.0 | 5 | 16.99 | 298.17 | D | D3 | 31698.0 | 8 years | 0 | ... | 0.0 | 21.0 | 4.0 | 5.0 | 3.0 | 11.0 | 0.0 | 0.0 | 0.0 | 4.0 |
3 | 3 | 11000.0 | 3 | 7.26 | 340.96 | A | A4 | 46854.0 | 10+ years | 1 | ... | 16.0 | 4.0 | 7.0 | 21.0 | 6.0 | 9.0 | 0.0 | 0.0 | 0.0 | 1.0 |
4 | 4 | 3000.0 | 3 | 12.99 | 101.07 | C | C2 | 54.0 | NaN | 1 | ... | 4.0 | 9.0 | 10.0 | 15.0 | 7.0 | 12.0 | 0.0 | 0.0 | 0.0 | 4.0 |
5 rows × 47 columns
data = pd.concat([train, testA], axis=0, ignore_index=True)
print(sorted(data['grade'].unique()))
print(sorted(data['subGrade'].unique()))
['A', 'B', 'C', 'D', 'E', 'F', 'G']
['A1', 'A2', 'A3', 'A4', 'A5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C1', 'C2', 'C3', 'C4', 'C5', 'D1', 'D2', 'D3', 'D4', 'D5', 'E1', 'E2', 'E3', 'E4', 'E5', 'F1', 'F2', 'F3', 'F4', 'F5', 'G1', 'G2', 'G3', 'G4', 'G5']
data['employmentLength'].value_counts(dropna=False).sort_index()
1 year 65671
10+ years 328525
2 years 90565
3 years 80163
4 years 59818
5 years 62645
6 years 46582
7 years 44230
8 years 45168
9 years 37866
< 1 year 80226
NaN 58541
Name: employmentLength, dtype: int64
data['employmentLength'].replace(to_replace='10+ years', value='10 years', inplace=True)
data['employmentLength'].replace('< 1 year', '0 years', inplace=True)
def employmentLength_to_int(s):
if pd.isnull(s):
return s
else:
return np.int8(s.split()[0])
data['employmentLength'] = data['employmentLength'].apply(employmentLength_to_int)
data['employmentLength'].value_counts(dropna=False).sort_index()
0.0 80226
1.0 65671
2.0 90565
3.0 80163
4.0 59818
5.0 62645
6.0 46582
7.0 44230
8.0 45168
9.0 37866
10.0 328525
NaN 58541
Name: employmentLength, dtype: int64
data['earliesCreditLine'].sample(5)
375743 Jun-2003
361340 Jul-1999
716602 Aug-1995
893559 Oct-1982
221525 Nov-2004
Name: earliesCreditLine, dtype: object
data['earliesCreditLine'] = data['earliesCreditLine'].apply(lambda s: int(s[-4:]))
data['earliesCreditLine'].describe()
count 1000000.000000
mean 1998.688632
std 7.606231
min 1944.000000
25% 1995.000000
50% 2000.000000
75% 2004.000000
max 2015.000000
Name: earliesCreditLine, dtype: float64
data.head()
id | loanAmnt | term | interestRate | installment | grade | subGrade | employmentTitle | employmentLength | homeOwnership | ... | n7 | n8 | n9 | n10 | n11 | n12 | n13 | n14 | n2.2 | n2.3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 35000.0 | 5 | 19.52 | 917.97 | E | E2 | 320.0 | 2.0 | 2 | ... | 4.0 | 12.0 | 2.0 | 7.0 | 0.0 | 0.0 | 0.0 | 2.0 | NaN | NaN |
1 | 1 | 18000.0 | 5 | 18.49 | 461.90 | D | D2 | 219843.0 | 5.0 | 0 | ... | NaN | NaN | NaN | 13.0 | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 2 | 12000.0 | 5 | 16.99 | 298.17 | D | D3 | 31698.0 | 8.0 | 0 | ... | 4.0 | 5.0 | 3.0 | 11.0 | 0.0 | 0.0 | 0.0 | 4.0 | NaN | NaN |
3 | 3 | 11000.0 | 3 | 7.26 | 340.96 | A | A4 | 46854.0 | 10.0 | 1 | ... | 7.0 | 21.0 | 6.0 | 9.0 | 0.0 | 0.0 | 0.0 | 1.0 | NaN | NaN |
4 | 4 | 3000.0 | 3 | 12.99 | 101.07 | C | C2 | 54.0 | NaN | 1 | ... | 10.0 | 15.0 | 7.0 | 12.0 | 0.0 | 0.0 | 0.0 | 4.0 | NaN | NaN |
5 rows × 49 columns
# 部分类别特征
cate_features = ['grade', 'subGrade', 'employmentTitle', 'homeOwnership', 'verificationStatus', 'purpose', 'postCode', 'regionCode', \
'applicationType', 'initialListStatus', 'title', 'policyCode']
for f in cate_features:
print(f, '类型数:', data[f].nunique())
grade 类型数: 7
subGrade 类型数: 35
employmentTitle 类型数: 298101
homeOwnership 类型数: 6
verificationStatus 类型数: 3
purpose 类型数: 14
postCode 类型数: 935
regionCode 类型数: 51
applicationType 类型数: 2
initialListStatus 类型数: 2
title 类型数: 47903
policyCode 类型数: 1
# 类型数在2之上,又不是高维稀疏的
data = pd.get_dummies(data, columns=['grade', 'subGrade', 'homeOwnership', 'verificationStatus', 'purpose', 'regionCode'], drop_first=True)
# 高维类别特征需要进行转换
for f in ['employmentTitle', 'postCode', 'title']:
data[f+'_cnts'] = data.groupby([f])['id'].transform('count')
data[f+'_rank'] = data.groupby([f])['id'].rank(ascending=False).astype(int)
del data[f]
features = [f for f in data.columns if f not in ['id','issueDate','isDefault']]
train = data[data.isDefault.notnull()].reset_index(drop=True)
test = data[data.isDefault.isnull()].reset_index(drop=True)
x_train = train[features]
x_test = test[features]
y_train = train['isDefault']
def cv_model(clf, train_x, train_y, test_x, clf_name): folds = 5 seed = 2020 kf = KFold(n_splits=folds, shuffle=True, random_state=seed) train = np.zeros(train_x.shape[0]) test = np.zeros(test_x.shape[0]) cv_scores = [] for i, (train_index, valid_index) in enumerate(kf.split(train_x, train_y)): print('************************************ {} ************************************'.format(str(i+1))) trn_x, trn_y, val_x, val_y = train_x.iloc[train_index], train_y[train_index], train_x.iloc[valid_index], train_y[valid_index] if clf_name == "lgb": train_matrix = clf.Dataset(trn_x, label=trn_y) valid_matrix = clf.Dataset(val_x, label=val_y) params = { 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 'min_child_weight': 5, 'num_leaves': 2 ** 5, 'lambda_l2': 10, 'feature_fraction': 0.8, 'bagging_fraction': 0.8, 'bagging_freq': 4, 'learning_rate': 0.1, 'seed': 2020, 'nthread': 28, 'n_jobs':24, 'silent': True, 'verbose': -1, } model = clf.train(params, train_matrix, 50000, valid_sets=[train_matrix, valid_matrix], verbose_eval=200,early_stopping_rounds=200) val_pred = model.predict(val_x, num_iteration=model.best_iteration) test_pred = model.predict(test_x, num_iteration=model.best_iteration) # print(list(sorted(zip(features, model.feature_importance("gain")), key=lambda x: x[1], reverse=True))[:20]) if clf_name == "xgb": train_matrix = clf.DMatrix(trn_x , label=trn_y) valid_matrix = clf.DMatrix(val_x , label=val_y) test_matrix = clf.DMatrix(test_x) params = {'booster': 'gbtree', 'objective': 'binary:logistic', 'eval_metric': 'auc', 'gamma': 1, 'min_child_weight': 1.5, 'max_depth': 5, 'lambda': 10, 'subsample': 0.7, 'colsample_bytree': 0.7, 'colsample_bylevel': 0.7, 'eta': 0.04, 'tree_method': 'exact', 'seed': 2020, 'nthread': 36, "silent": True, } watchlist = [(train_matrix, 'train'),(valid_matrix, 'eval')] model = clf.train(params, train_matrix, num_boost_round=50000, evals=watchlist, verbose_eval=200, early_stopping_rounds=200) val_pred = model.predict(valid_matrix, ntree_limit=model.best_ntree_limit) test_pred = model.predict(test_matrix , ntree_limit=model.best_ntree_limit) if clf_name == "cat": params = {'learning_rate': 0.05, 'depth': 5, 'l2_leaf_reg': 10, 'bootstrap_type': 'Bernoulli', 'od_type': 'Iter', 'od_wait': 50, 'random_seed': 11, 'allow_writing_files': False} model = clf(iterations=20000, **params) model.fit(trn_x, trn_y, eval_set=(val_x, val_y), cat_features=[], use_best_model=True, verbose=500) val_pred = model.predict(val_x) test_pred = model.predict(test_x) train[valid_index] = val_pred test = test_pred / kf.n_splits cv_scores.append(roc_auc_score(val_y, val_pred)) print(cv_scores) print("%s_scotrainre_list:" % clf_name, cv_scores) print("%s_score_mean:" % clf_name, np.mean(cv_scores)) print("%s_score_std:" % clf_name, np.std(cv_scores)) return train, test
def lgb_model(x_train, y_train, x_test):
lgb_train, lgb_test = cv_model(lgb, x_train, y_train, x_test, "lgb")
return lgb_train, lgb_test
def xgb_model(x_train, y_train, x_test):
xgb_train, xgb_test = cv_model(xgb, x_train, y_train, x_test, "xgb")
return xgb_train, xgb_test
def cat_model(x_train, y_train, x_test):
cat_train, cat_test = cv_model(CatBoostRegressor, x_train, y_train, x_test, "cat")
return cat_train, cat_test
lgb_train, lgb_test = lgb_model(x_train, y_train, x_test)
************************************ 1 ************************************ Training until validation scores don't improve for 200 rounds. [200] training's auc: 0.742884 valid_1's auc: 0.73055 [400] training's auc: 0.755686 valid_1's auc: 0.731888 [600] training's auc: 0.766421 valid_1's auc: 0.731988 [800] training's auc: 0.776244 valid_1's auc: 0.731868 Early stopping, best iteration is: [656] training's auc: 0.769146 valid_1's auc: 0.732081 [0.7320814878889421] ************************************ 2 ************************************ Training until validation scores don't improve for 200 rounds. [200] training's auc: 0.74372 valid_1's auc: 0.726466 [400] training's auc: 0.756459 valid_1's auc: 0.727727 [600] training's auc: 0.767156 valid_1's auc: 0.727776 Early stopping, best iteration is: [520] training's auc: 0.762985 valid_1's auc: 0.727902 [0.7320814878889421, 0.7279015876934286] ************************************ 3 ************************************ Training until validation scores don't improve for 200 rounds. [200] training's auc: 0.742884 valid_1's auc: 0.731466 [400] training's auc: 0.755466 valid_1's auc: 0.732748 [600] training's auc: 0.766313 valid_1's auc: 0.733069 [800] training's auc: 0.776349 valid_1's auc: 0.732892 Early stopping, best iteration is: [694] training's auc: 0.771133 valid_1's auc: 0.73312 [0.7320814878889421, 0.7279015876934286, 0.7331203287449972] ************************************ 4 ************************************ Training until validation scores don't improve for 200 rounds. [200] training's auc: 0.742632 valid_1's auc: 0.730114 [400] training's auc: 0.755357 valid_1's auc: 0.731443 [600] training's auc: 0.765983 valid_1's auc: 0.731566 [800] training's auc: 0.776112 valid_1's auc: 0.731805 Early stopping, best iteration is: [706] training's auc: 0.771324 valid_1's auc: 0.731887 [0.7320814878889421, 0.7279015876934286, 0.7331203287449972, 0.731886588682118] ************************************ 5 ************************************ Training until validation scores don't improve for 200 rounds. [200] training's auc: 0.743113 valid_1's auc: 0.729226 [400] training's auc: 0.7559 valid_1's auc: 0.730816 [600] training's auc: 0.766388 valid_1's auc: 0.73092 [800] training's auc: 0.77627 valid_1's auc: 0.731029 [1000] training's auc: 0.785791 valid_1's auc: 0.730933 Early stopping, best iteration is: [883] training's auc: 0.780369 valid_1's auc: 0.731096 [0.7320814878889421, 0.7279015876934286, 0.7331203287449972, 0.731886588682118, 0.7310960057774112] lgb_scotrainre_list: [0.7320814878889421, 0.7279015876934286, 0.7331203287449972, 0.731886588682118, 0.7310960057774112] lgb_score_mean: 0.7312171997573793 lgb_score_std: 0.001779041696522632
xgb_train, xgb_test = xgb_model(x_train, y_train, x_test)
************************************ 1 ************************************ [0] train-auc:0.677293 eval-auc:0.678869 Multiple eval metrics have been passed: 'eval-auc' will be used for early stopping. Will train until eval-auc hasn't improved in 200 rounds. [200] train-auc:0.727527 eval-auc:0.723771 [400] train-auc:0.73516 eval-auc:0.727725 [600] train-auc:0.740458 eval-auc:0.729631 [800] train-auc:0.744963 eval-auc:0.730829 [1000] train-auc:0.748802 eval-auc:0.731495 [1200] train-auc:0.752295 eval-auc:0.732074 [1400] train-auc:0.755574 eval-auc:0.732421 [1600] train-auc:0.758671 eval-auc:0.732674 [1800] train-auc:0.761605 eval-auc:0.732964 [2000] train-auc:0.764627 eval-auc:0.733111 [2200] train-auc:0.767443 eval-auc:0.733201 [2400] train-auc:0.770204 eval-auc:0.733224 Stopping. Best iteration: [2328] train-auc:0.7692 eval-auc:0.733246 [0.7332460852050292] ************************************ 2 ************************************ [0] train-auc:0.677718 eval-auc:0.672523 Multiple eval metrics have been passed: 'eval-auc' will be used for early stopping. Will train until eval-auc hasn't improved in 200 rounds. [200] train-auc:0.728628 eval-auc:0.720255 [400] train-auc:0.736149 eval-auc:0.724308 [600] train-auc:0.741354 eval-auc:0.726443 [800] train-auc:0.745611 eval-auc:0.72746 [1000] train-auc:0.749627 eval-auc:0.728194 [1200] train-auc:0.753176 eval-auc:0.728711 [1400] train-auc:0.756476 eval-auc:0.72899 [1600] train-auc:0.759574 eval-auc:0.729224 [1800] train-auc:0.762608 eval-auc:0.729501 [2000] train-auc:0.765549 eval-auc:0.729627 [2200] train-auc:0.768304 eval-auc:0.729782 [2400] train-auc:0.771131 eval-auc:0.729922 [2600] train-auc:0.773769 eval-auc:0.729961 [2800] train-auc:0.776371 eval-auc:0.72999 Stopping. Best iteration: [2697] train-auc:0.775119 eval-auc:0.730036 [0.7332460852050292, 0.7300358478747684] ************************************ 3 ************************************ [0] train-auc:0.676641 eval-auc:0.67765 Multiple eval metrics have been passed: 'eval-auc' will be used for early stopping. Will train until eval-auc hasn't improved in 200 rounds. [200] train-auc:0.72757 eval-auc:0.724632 [400] train-auc:0.735185 eval-auc:0.728571 [600] train-auc:0.740671 eval-auc:0.73067 [800] train-auc:0.745049 eval-auc:0.731899 [1000] train-auc:0.748976 eval-auc:0.732787 [1200] train-auc:0.752383 eval-auc:0.73321 [1400] train-auc:0.75564 eval-auc:0.733548 [1600] train-auc:0.758796 eval-auc:0.733825 [1800] train-auc:0.761717 eval-auc:0.734007 [2000] train-auc:0.76459 eval-auc:0.734193 [2200] train-auc:0.767399 eval-auc:0.734261 [2400] train-auc:0.770174 eval-auc:0.734362 [2600] train-auc:0.772818 eval-auc:0.734369 [2800] train-auc:0.775568 eval-auc:0.734391 [3000] train-auc:0.777985 eval-auc:0.73444 [3200] train-auc:0.780514 eval-auc:0.734477 [3400] train-auc:0.782893 eval-auc:0.734427 Stopping. Best iteration: [3207] train-auc:0.780621 eval-auc:0.734494 [0.7332460852050292, 0.7300358478747684, 0.7344942212088965] ************************************ 4 ************************************ [0] train-auc:0.677768 eval-auc:0.677179 Multiple eval metrics have been passed: 'eval-auc' will be used for early stopping. Will train until eval-auc hasn't improved in 200 rounds. [200] train-auc:0.727614 eval-auc:0.72295 [400] train-auc:0.735165 eval-auc:0.726994 [600] train-auc:0.740498 eval-auc:0.729116 [800] train-auc:0.744884 eval-auc:0.730417 [1000] train-auc:0.748782 eval-auc:0.731318 [1200] train-auc:0.75225 eval-auc:0.731899 [1400] train-auc:0.755505 eval-auc:0.732295 [1600] train-auc:0.758618 eval-auc:0.732629 [1800] train-auc:0.76176 eval-auc:0.733046 [2000] train-auc:0.764736 eval-auc:0.733189 [2200] train-auc:0.767476 eval-auc:0.733276 [2400] train-auc:0.770154 eval-auc:0.733409 [2600] train-auc:0.772874 eval-auc:0.733469 [2800] train-auc:0.77541 eval-auc:0.733405 Stopping. Best iteration: [2644] train-auc:0.773429 eval-auc:0.733488 [0.7332460852050292, 0.7300358478747684, 0.7344942212088965, 0.7334876284761012] ************************************ 5 ************************************ [0] train-auc:0.677768 eval-auc:0.676353 Multiple eval metrics have been passed: 'eval-auc' will be used for early stopping. Will train until eval-auc hasn't improved in 200 rounds. [200] train-auc:0.728072 eval-auc:0.722913 [400] train-auc:0.735517 eval-auc:0.726582 [600] train-auc:0.740782 eval-auc:0.728449 [800] train-auc:0.745258 eval-auc:0.729653 [1000] train-auc:0.749185 eval-auc:0.730489 [1200] train-auc:0.752723 eval-auc:0.731038 [1400] train-auc:0.755985 eval-auc:0.731466 [1600] train-auc:0.759166 eval-auc:0.731758 [1800] train-auc:0.762205 eval-auc:0.73199 [2000] train-auc:0.765197 eval-auc:0.732145 [2200] train-auc:0.767976 eval-auc:0.732194 Stopping. Best iteration: [2191] train-auc:0.767852 eval-auc:0.732213 [0.7332460852050292, 0.7300358478747684, 0.7344942212088965, 0.7334876284761012, 0.7322134048106561] xgb_scotrainre_list: [0.7332460852050292, 0.7300358478747684, 0.7344942212088965, 0.7334876284761012, 0.7322134048106561] xgb_score_mean: 0.7326954375150903 xgb_score_std: 0.0015147392354657807
cat_train, cat_test = cat_model(x_train, y_train, x_test)
************************************ 1 ************************************ 0: learn: 0.4415198 test: 0.4387088 best: 0.4387088 (0) total: 111ms remaining: 37m 6s 500: learn: 0.3772118 test: 0.3759665 best: 0.3759665 (500) total: 37.7s remaining: 24m 25s 1000: learn: 0.3756709 test: 0.3752058 best: 0.3752058 (1000) total: 1m 14s remaining: 23m 41s 1500: learn: 0.3745785 test: 0.3748423 best: 0.3748423 (1500) total: 1m 52s remaining: 23m 7s 2000: learn: 0.3736834 test: 0.3746564 best: 0.3746564 (2000) total: 2m 29s remaining: 22m 28s 2500: learn: 0.3728568 test: 0.3745180 best: 0.3745165 (2492) total: 3m 7s remaining: 21m 52s 3000: learn: 0.3720793 test: 0.3744201 best: 0.3744198 (2998) total: 3m 44s remaining: 21m 14s Stopped by overfitting detector (50 iterations wait) bestTest = 0.3744006318 bestIteration = 3086 Shrink model to first 3087 iterations. [0.7326058985428212] ************************************ 2 ************************************ 0: learn: 0.4406928 test: 0.4420714 best: 0.4420714 (0) total: 53.3ms remaining: 17m 46s 500: learn: 0.3765250 test: 0.3787287 best: 0.3787287 (500) total: 38.7s remaining: 25m 8s 1000: learn: 0.3749822 test: 0.3779503 best: 0.3779503 (998) total: 1m 16s remaining: 24m 18s 1500: learn: 0.3738772 test: 0.3775654 best: 0.3775654 (1500) total: 1m 54s remaining: 23m 34s 2000: learn: 0.3729354 test: 0.3773407 best: 0.3773401 (1999) total: 2m 33s remaining: 22m 56s 2500: learn: 0.3721077 test: 0.3771987 best: 0.3771971 (2496) total: 3m 10s remaining: 22m 15s 3000: learn: 0.3713621 test: 0.3771114 best: 0.3771114 (3000) total: 3m 49s remaining: 21m 37s Stopped by overfitting detector (50 iterations wait) bestTest = 0.3770400469 bestIteration = 3382 Shrink model to first 3383 iterations. [0.7326058985428212, 0.7292909146788396] ************************************ 3 ************************************ 0: learn: 0.4408230 test: 0.4418939 best: 0.4418939 (0) total: 59.1ms remaining: 19m 42s 500: learn: 0.3767851 test: 0.3776319 best: 0.3776319 (500) total: 40.4s remaining: 26m 12s 1000: learn: 0.3752331 test: 0.3768292 best: 0.3768292 (1000) total: 1m 20s remaining: 25m 19s 1500: learn: 0.3741550 test: 0.3764926 best: 0.3764926 (1500) total: 2m remaining: 24m 39s 2000: learn: 0.3732520 test: 0.3762840 best: 0.3762832 (1992) total: 2m 40s remaining: 24m 2s 2500: learn: 0.3724303 test: 0.3761303 best: 0.3761279 (2490) total: 3m 20s remaining: 23m 22s 3000: learn: 0.3716684 test: 0.3760402 best: 0.3760395 (2995) total: 4m remaining: 22m 42s 3500: learn: 0.3709308 test: 0.3759509 best: 0.3759502 (3495) total: 4m 40s remaining: 22m 2s 4000: learn: 0.3702269 test: 0.3759039 best: 0.3759027 (3993) total: 5m 20s remaining: 21m 20s 4500: learn: 0.3695477 test: 0.3758698 best: 0.3758663 (4459) total: 6m remaining: 20m 40s Stopped by overfitting detector (50 iterations wait) bestTest = 0.3758663409 bestIteration = 4459 Shrink model to first 4460 iterations. [0.7326058985428212, 0.7292909146788396, 0.7341207611812285] ************************************ 4 ************************************ 0: learn: 0.4408778 test: 0.4413264 best: 0.4413264 (0) total: 46.6ms remaining: 15m 32s 500: learn: 0.3768022 test: 0.3777678 best: 0.3777678 (500) total: 40.3s remaining: 26m 7s 1000: learn: 0.3753097 test: 0.3769403 best: 0.3769403 (1000) total: 1m 20s remaining: 25m 24s 1500: learn: 0.3742418 test: 0.3765698 best: 0.3765698 (1500) total: 2m remaining: 24m 41s 2000: learn: 0.3733478 test: 0.3763500 best: 0.3763496 (1998) total: 2m 40s remaining: 23m 59s 2500: learn: 0.3725263 test: 0.3762101 best: 0.3762093 (2488) total: 3m 20s remaining: 23m 19s 3000: learn: 0.3717486 test: 0.3760966 best: 0.3760966 (2999) total: 3m 59s remaining: 22m 36s Stopped by overfitting detector (50 iterations wait) bestTest = 0.3760182133 bestIteration = 3432 Shrink model to first 3433 iterations. [0.7326058985428212, 0.7292909146788396, 0.7341207611812285, 0.7324483603137153] ************************************ 5 ************************************ 0: learn: 0.4409876 test: 0.4409159 best: 0.4409159 (0) total: 52.3ms remaining: 17m 26s 500: learn: 0.3768055 test: 0.3776229 best: 0.3776229 (500) total: 38s remaining: 24m 38s 1000: learn: 0.3752600 test: 0.3768397 best: 0.3768397 (1000) total: 1m 15s remaining: 23m 57s 1500: learn: 0.3741843 test: 0.3764855 best: 0.3764855 (1500) total: 1m 53s remaining: 23m 16s 2000: learn: 0.3732691 test: 0.3762491 best: 0.3762490 (1998) total: 2m 31s remaining: 22m 40s 2500: learn: 0.3724407 test: 0.3761154 best: 0.3761154 (2500) total: 3m 9s remaining: 22m 5s 3000: learn: 0.3716764 test: 0.3760184 best: 0.3760184 (3000) total: 3m 47s remaining: 21m 26s 3500: learn: 0.3709545 test: 0.3759453 best: 0.3759453 (3500) total: 4m 24s remaining: 20m 47s Stopped by overfitting detector (50 iterations wait) bestTest = 0.3759421091 bestIteration = 3544 Shrink model to first 3545 iterations. [0.7326058985428212, 0.7292909146788396, 0.7341207611812285, 0.7324483603137153, 0.7312334660628076] cat_scotrainre_list: [0.7326058985428212, 0.7292909146788396, 0.7341207611812285, 0.7324483603137153, 0.7312334660628076] cat_score_mean: 0.7319398801558824 cat_score_std: 0.001610863965629903 --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-25-2e9bafef31e8> in <module> ----> 1 cat_train, cat_test = cat_model(x_train, y_train, x_test) TypeError: 'NoneType' object is not iterable
rh_test = lgb_test*0.5 + xgb_test*0.5
testA['isDefault'] = rh_test
testA[['id','isDefault']].to_csv('test_sub.csv', index=False)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。