赞
踩
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from lightgbm import LGBMClassifier
from sklearn.preprocessing import PolynomialFeatures
object_columns = df.select_dtypes(include='object').columns
for col in object_columns:
df[col] = df[col].fillna('0')
df[col] = df[col].map(dict(zip(list(set(df[col])), [i for i in range(len(list(set(df[col]))))])))
from sklearn.preprocessing import PolynomialFeatures
df = df.fillna(0)
poly = PolynomialFeatures(degree=3, include_bias=False, interaction_only=True)
x_train = df.drop('slide', axis=1)
y_train = df['slide']
poly_features = poly.fit_transform(x_train)
feature_names = poly.get_feature_names_out()
poly_df = pd.DataFrame(poly_features, columns=feature_names)
X_df = poly_df
train_x, test_x, train_y, test_y = train_test_split(X_df, y_train, test_size=0.2, random_state=42) model = LGBMClassifier( boosting_type='gbdt', # 基学习器 gbdt:传统的梯度提升决策树; dart:Dropouts多重加性回归树 n_estimators=500, # 迭代次数 learning_rate=0.1, # 步长 max_depth=4, # 树的最大深度 min_child_weight=1, # 决定最小叶子节点样本权重和 # min_split_gain=0.1, # 在树的叶节点上进行进一步分区所需的最小损失减少 subsample=1, # 每个决策树所用的子样本占总样本的比例(作用于样本) colsample_bytree=1, # 建立树时对特征随机采样的比例(作用于特征)典型值:0.5-1 random_state=27, # 指定随机种子,为了复现结果 importance_type='gain', # 特征重要性的计算方式,split:分隔的总数; gain:总信息增益 objective='binary', ) model.fit(train_x, train_y, eval_metric="auc", verbose=50, \ eval_set=[(train_x, train_y), (test_x, test_y)], \ ) print(classification_report(model.predict(test_x), test_y))
feature_import_df = pd.DataFrame(zip(model.feature_name_, model.feature_importances_))
feature_import_df.columns = ['feature', 'import_values']
feature_import_df = feature_import_df.sort_values('import_values', ascending=False)
feature_import_df
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。