赞
踩
- def train_model(x_train, y_train, x_test, model_name, cv_state=True):
- '''
- Parameters
- ----------
- x_train : 训练集 x, np.array类型二维数组, [samples_train,features_train]
- y_train : 训练集 y np.array类型一维数组, [samples_train]
- x_test : 测试集 x np.array类型二维数组, [samples_test,features_test]
- model_name : 选用什么模型 :random forest / XGB
- cv_state: 是否需要模型因子挑选
- Returns
- -------
- y_predictoftest: 根据测试集 x 得到的预报测试集y
- '''
- if cv_state: #cv_state=True 则使用gridsearchCv 挑选最优参数
- if model_name == 'random forest':
- model = RandomForestRegressor(random_state=1, criterion='squared_error')
- # 挑选参数
- paremeters = [{"max_features": range(1, 32, 3),
- "min_samples_leaf": range(1, 20, 3),
- "max_depth": range(1, 20, 3)
- }]
- grid = GridSearchCV(model, paremeters, cv=10, scoring="neg_mean_squared_error",verbose=10)
- grid.fit(x_train, y_train)
- print('best_params_=', grid.best_params_)
- print('best_score_=', grid.best_score_)
- model = RandomForestRegressor(random_state=1, criterion='mse',
- max_features=grid.best_params_['max_features'],
- min_samples_leaf=grid.best_params_['min_samples_leaf'],
- max_depth=grid.best_params_['max_depth'])
- elif model_name == 'XGB':
- model = xgb.XGBRegressor(random_state=1)
- # 挑选参数
- parameters = [{"eta": [0.3, 0.2, 0.1],
- "max_depth": [3, 5, 6, 10, 20],
- "n_estimators": [100, 200, 500],
- 'gamma': [0, 0.1, 0.2, 0.5, 1]
- }]
- grid = GridSearchCV(model, parameters, cv=10, scoring="neg_mean_squared_error",verbose=10)
- grid.fit(x_train, y_train)
- print('best_params_=', grid.best_params_)
- print('best_score_=', grid.best_score_)
- model = xgb.XGBRegressor(random_state=1,
- eta=grid.best_params_['eta'],
- max_depth=grid.best_params_['max_depth'],
- n_estimators=grid.best_params_['n_estimators'],
- gamma=grid.best_params_['gamma'])
-
- else: #cv_state=False 则根据自己需要修改模型参数后直接推理
- if model_name == 'random forest':
- model = RandomForestRegressor(random_state=1, criterion='mse', max_depth=7, max_features=31,
- min_samples_leaf=10) # random_state=1,criterion='mse',max_depth=7,max_features=31,min_samples_leaf=10
- elif model_name == 'XGB':
- #model = xgb.XGBRegressor(random_state=1, learning_rate=0.1, max_depth=2, n_estimators=100)
- model = xgb.XGBRegressor(random_state=1, gamma=0.1, max_depth=3, n_estimators=100)
- regr = model.fit(x_train, y_train)
- y_predictoftest = regr.predict(x_test)
-
- return y_predictoftest
- import numpy as np
- import pandas as pd
- from sklearn.model_selection import GridSearchCV
- from sklearn.ensemble import RandomForestRegressor
- import xgboost as xgb
- from sklearn.model_selection import StratifiedKFold
x_train,y_train, x_test,y_test, 类型描述可看train_model中描述
y_predictoftest=train_model(x_train,y_train, x_test,'XGB',cv_state=True)
如果想调用XGB,就用'XGB';
如果想调用random forest,就用'random forest'
cv_state: 是否需要GridSearchCV进行调参。
scikit-learn 官网Random Forest 部分:
分类器:
sklearn.ensemble.RandomForestClassifier — scikit-learn 1.1.1 documentation
回归器:
sklearn.ensemble.RandomForestRegressor — scikit-learn 1.1.1 documentation
XGBoost :
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。