赞
踩
提供四种预测模型的方案和评估结果:KNN、逻辑回归、决策树分类、voting模型
# 1.导入数据,将特征和预测值进行分离,确定x,y值 import numpy as np import pandas as pd worker = pd.read_csv('C:\\Users\\Liu\\Desktop\\data.csv') x = worker.drop(columns = ['Attrition']) y = worker['Attrition'] # 2.数据分割 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 666) # 3.分别导入网格搜索调优的KNN,搜索调优的逻辑回归,搜索调优的决策树模型 from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier #(1)网格搜索调优的KNN knn_clf = KNeighborsClassifier() from sklearn.model_selection import GridSearchCV from sklearn.metrics import accuracy_score param_knn = { 'n_neighbors':[i for i in range(1,7)] } knn_grid = GridSearchCV(knn_clf,param_knn) knn_grid.fit(x_train,y_train) knn_grid.score(x_test,y_test) knn_best_model = knn_grid.best_estimator_ knn_grid.best_score_ # predict_knn = knn_grid.predict(x_test) # accuracy_score(y_test,predict_knn) # (2)网格搜索调优的逻辑回归 lr_clf = LogisticRegression() from sklearn.model_selection import GridSearchCV param_lr = [ { 'C':[0.0001, 0.01, 0.1, 1], 'penalty':['l1'], 'solver':['liblinear'] }, { 'C':[0.0001, 0.01, 0.1, 1], 'penalty':['l2'], 'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'] } ] lr_grid = GridSearchCV(lr_clf,param_lr) lr_grid.fit(x_train,y_train) lr_best_model = lr_grid.best_estimator_ lr_grid.best_score_ # (3)网格搜索调优的决策树模型 dt_clf = DecisionTreeClassifier(random_state=30,splitter='random',criterion = 'gini') from sklearn.model_selection import GridSearchCV param_dt = { 'criterion':['gini', 'entropy'], 'max_depth':[i for i in range(2,10)], 'min_samples_leaf':[i for i in range(1,10)], 'min_samples_split': [i for i in range(2, 10)] } dt_grid = GridSearchCV(dt_clf,param_grid=param_dt,cv = 8) dt_grid.fit(x_train,y_train) dt_grid.score(x_test,y_test) dt_best_model = dt_grid.best_estimator_ dt_grid.best_score_ 4.以上三个模型为基学习器,建立 voting 模型,并网格搜索调优 from sklearn.ensemble import VotingClassifier voting_param = { 'voting':['soft', 'hard'] } voting_clf = VotingClassifier( estimators=[ ('knn_clf',knn_best_model), ('lr_clf',lr_best_model), ('dt_clf',dt_best_model ) ] ) model = GridSearchCV(voting_clf, cv=10, param_grid=voting_param) model.fit(x_train,y_train) model.best_score_
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。