赞
踩
使用网格搜索法对5个模型进行调优(调参时采用五折交叉验证的方式),并进行模型评估
可以使用sklearn中的网格搜索
- from sklearn.model_selection import GridSearchCV
-
- clf = LogisticRegression(C=1.0, max_iter=1000).fit(train_data, train_label)
- parameters = {'C':[1.0,2.0,3.0,4.0,5.0], 'max_iter': [100,200,500,1000,1500,2000]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid = grid.fit(train_data, train_label)
- grid_test = grid.predict(test_data)
- grid_train = grid.predict(train_data)
- from sklearn import metrics
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import StratifiedKFold
- from sklearn.linear_model import LogisticRegression
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.tree import DecisionTreeClassifier
- from sklearn import svm
- from xgboost.sklearn import XGBClassifier
- from sklearn.feature_selection import RFE
- from sklearn.metrics import roc_curve, auc
- import matplotlib.pyplot as plt
- from sklearn.model_selection import GridSearchCV
-
- def LR_classifier(train_data, train_label, test_data, test_label):
- clf = LogisticRegression(C=1.0, max_iter=1000).fit(train_data, train_label)
- parameters = {'C':[1.0,2.0,3.0,4.0,5.0], 'max_iter': [100,200,500,1000,1500,2000]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid_dtc = grid.fit(train_data, train_label)
- grid_test = grid_dtc.predict(test_data)
- grid_train = grid_dtc.predict(train_data)
- return grid_test, grid_train
-
- def svm_classifier(train_data, train_label, test_data, test_label):
- clf = svm.SVC(C=1.0, kernel='linear', gamma=20).fit(train_data, train_label)
- parameters = {'C':[1.0,2.0,3.0], 'gamma':[5,10,15, 20, 25]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid_dtc = grid.fit(train_data, train_label)
- grid_test = grid_dtc.predict(test_data)
- grid_train = grid_dtc.predict(train_data)
- return grid_test, grid_train
-
- def dt_classifier(train_data, train_label, test_data, test_label):
- clf = DecisionTreeClassifier(max_depth=5).fit(train_data, train_label)
- parameters = {'max_depth':[2,5,8,10,15]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid_dtc = grid.fit(train_data, train_label)
- grid_test = grid_dtc.predict(test_data)
- grid_train = grid_dtc.predict(train_data)
- return grid_test, grid_train
-
- def rf_classifier(train_data, train_label, test_data, test_label):
- clf = RandomForestClassifier(n_estimators=8, random_state=5, max_depth=6, min_samples_split=2).fit(train_data, train_label)
- parameters = {'n_estimators':[3,5,8,10,14], 'random_state':[2,3,5,7,9],'max_depth':[5,6,8,9,10,15],'min_samples_split':[2,3,4,5,6]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid_dtc = grid.fit(train_data, train_label)
- grid_test = grid_dtc.predict(test_data)
- grid_train = grid_dtc.predict(train_data)
- return grid_test, grid_train
-
- def xgb_classifier(train_data, train_label, test_data, test_label):
- clf = XGBClassifier(n_estimators=8,learning_rate= 0.25, max_depth=20,subsample=1,gamma=13, seed=1000,num_class=1).fit(train_data, train_label)
- parameters = {'n_estimators':[3,5,8,10,14], 'learning_rate':[0.1,0.2,0.25,0.3,0.35,0.4],'max_depth':[5,10,15,20,25],'gamma':[6,9,12,13,15],'seed':[500,1000,1500]}
- grid = GridSearchCV(clf, parameters, cv=5)
- grid_dtc = grid.fit(train_data, train_label)
- grid_test = grid_dtc.predict(test_data)
- grid_train = grid_dtc.predict(train_data)
- return grid_test, grid_train

模型 | Accuracy | Precision | Recall | F1_score | AUC | ROC |
Logistic Regression | train:0.7913 test:0.787 | train:0.7351 test:0.7195 | train:0.2668, test:0.2552 | train:0.3915, test:0.3759 | train:0.6173, test:0.6105 | ![]() |
Support Vector Machine | train:0.7793, test:0.7762 | train:0.8025, test:0.7783 | train:0.1632, test:0.1554 | train:0.2712, test:0.2588 | train:0.5748, test:0.5702 | ![]() |
Decision Tree | train:0.78, test:0.7732 | train:0.8241, test:0.7474 | train:0.1611, test:0.1537 | train:0.2632, test:0.2483 | train:0.5746, test:0.5676 | ![]() |
Random Forest | train:0.8311, test:0.778 | train:0.9326, test:0.7077 | train:0.3504, test:0.2058 | train:0.502, test:0.3172 | train:0.6716, test:0.5881 | ![]() |
XGBoost | train:0.839, test:0.7843 | train:0.8456, test:0.6484 | train:0.4402, test:0.3127 | train:0.5786, test:0.4215 | train:0.7067, test:0.6278 | ![]() |
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。