赞
踩
import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split df=pd.read_excel(readFileName) list_columns=list(df.columns[:-1]) x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=0) #n_estimators表示树的个数,测试中100颗树足够 forest=RandomForestClassifier(n_estimators=trees,random_state=0) forest.fit(x_train,y_train) print("random forest with %d trees:"%trees) print("accuracy on the training subset:{:.3f}".format(forest.score(x_train,y_train))) print("accuracy on the test subset:{:.3f}".format(forest.score(x_test,y_test))) print('Feature importances:{}'.format(forest.feature_importances_)) plt.barh(range(n_features),forest.feature_importances_,align='center') plt.yticks(np.arange(n_features),names) plt.title("random forest with %d trees:"%trees) plt.xlabel('Feature Importance')
版权声明:文章来自公众号(python风控模型),未经许可,不得抄袭。遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。