赞
踩
目录
以下流水线将标准化与KNN分类封装在一起。
- from sklearn.pipeline import Pipeline
- # 构建流水线
- pipe = Pipeline(steps=[('scaler',StandardScaler()),
- ('knn', KNeighborsClassifier())])
- # 训练
- pipe.fit(X_train, y_train)
- # 评估
- print("测试集分类正确率:", round(pipe.score(X_test, y_test), 2))
- from sklearn.model_selection import GridSearchCV
- # 设置参数网络,knn流水线中对KNN分类的命名knn__(双下划线)接对应模型的参数
- param_grid = {'knn__n_neighbors': [2, 4, 6, 8, 10],
- 'knn__weights': ['uniform', 'distance']}
- # 网格搜索
- grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, cv=5) #cv=5 5折交叉验证
- grid_search.fit(X_train, y_train)
- # 测试集上的得分
- grid_search.score(X_test, y_test)
实现
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.preprocessing import StandardScaler
- from sklearn.linear_model import LogisticRegression
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.svm import SVC
- from sklearn.pipeline import Pipeline
- pipe=Pipeline(steps=[("scaler",MinMaxScaler()),("model",LogisticRegression())])
- scale_selector=[StandardScaler(),MinMaxScaler()]
- model_selector=[KNeighborsClassifier(),SVC(),LogisticRegression()]
- param_grid={"scaler":scale_selector,"model":model_selector}
- grid_search=GridSearchCV(estimator=pipe,param_grid=param_grid,cv=5)
- grid_search.fit(X_train_s,y_train)
- print(grid_search.best_estimator_)
- grid_search.score(X_test_s,y_test)
实现
- from sklearn.feature_selection import RFECV
- from sklearn.tree import DecisionTreeClassifier
-
- # 在流水线中加入特征选择
- pipe_new = Pipeline(steps=[('scaler',StandardScaler()),
- ('selector', RFECV(DecisionTreeClassifier(random_state=10), cv=5)),
- ('model', KNeighborsClassifier())])
- scale_selector=[StandardScaler(),MinMaxScaler()]
- model_selector=[KNeighborsClassifier(),SVC(),LogisticRegression()]
- # 设置参数网络
- param_grid = {'scaler':scale_selector,
- 'model': model_selector,
- 'model__class_weight':['balanced', None],
- 'model__C':[0.01, 0.1, 0.2, 0.5, 1]}
- # 网格搜索
- grid_search = GridSearchCV(estimator=pipe_new, param_grid=param_grid, cv=5)
- grid_search.fit(X_train, y_train)
-
- # 输出最优的步骤,查看特征排名
- pd.Series(grid_search.best_estimator_.named_steps['selector'].ranking_, index=X_train.columns)
实现
- from sklearn.decomposition import PCA
-
- # 在管道中加入PCA
- pipe_new = Pipeline(steps=[('scaler',StandardScaler()),
- ('decomposition', PCA(3)),
- ('model', KNeighborsClassifier())])
-
- # 设置参数网络
- param_grid = {'scaler':scale_selector,
- 'model': model_selector,
- 'decomposition__n_components':[2, 3, 4, 5, 6],
- 'model__class_weight':['balanced', None],
- 'model__C':[0.01, 0.1, 0.2, 0.5, 1]}
- # 网格搜索
- grid_search = GridSearchCV(estimator=pipe_new, param_grid=param_grid, cv=5)
- grid_search.fit(X_train, y_train)
-
- # 查看方差贡献率
- grid_search.best_estimator_.named_steps['decomposition'].explained_variance_ratio_.sum()
实现
- from sklearn.decomposition import PCA
- from sklearn.feature_selection import RFECV
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import RandomForestClassifier
- pipe_new2=Pipeline(steps=[("scaler",StandardScaler),("selector",PCA(3)),("model",KNeighborsClassifier())])
- model_selector=[LogisticRegression(random_state=10),SVC(),KNeighborsClassifier()]
- scaler_selector=[StandardScaler(),MinMaxScaler()]
- selector_selector=[PCA(3),RFECV(DecisionTreeClassifier(random_state=10),cv=5)]
- param_grid_2={"scaler":scaler_selector,"selector":selector_selector,"model":model_selector
- ,"model__class_weight":["balanced",None],
- "model__C":[0.01,0.1,0.2,0.5,1]}
- grid_search=GridSearchCV(estimator=pipe_new2,param_grid=param_grid_2,cv=5)
- grid_search.fit(X_train,y_train)
- grid_search.best_estimator_.named_steps["selector"].explained_variance_ratio_.sum()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。