赞
踩
- from sklearn.model_selection import train_test_split
- from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
- from functools import reduce
- import numpy as np
- import pandas as pd
-
-
- # 数据导入及基本信息定义
- data = pd.read_excel('data2(Topsis评分评级).xlsx')
- data = data.drop(columns=['ID'])
-
- prediction_set = data[data['MM'].isna()]
- training_set = data.dropna()
-
- features = ['XXX','XXX'] # 此处放表格列名
- X = training_set[features]
- y = training_set['MM']
- minrmse = 1000
- maxscore = 0
-
- # 选取最优的random_state
- '''
- for randomstate in range(50, -1, -1):
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.2, random_state=randomstate)
- model = RandomForestClassifier(random_state=randomstate)
- model.fit(X_train, y_train)
- y_pred = model.predict(X_test)
- score = accuracy_score(y_test, y_pred)
- rmse = np.sqrt(mean_squared_error(y_test, y_pred))
- if score > maxscore:
- maxscore = score
- print(maxscore, randomstate)
- X_pred = prediction_set[features]
- y_pred_prediction_set = model.predict(X_pred)
- '''
- # 第一种方法:随机森林分类器
- X_train, X_test, y_train, y_test = train_test_split(X,
- y,
- test_size=0.2,
- random_state=27)
-
- model = RandomForestClassifier(random_state=27)
- model.fit(X_train, y_train)
-
- y_pred = model.predict(X_test)
- X_pred = prediction_set[features]
- y_pred_prediction_set = model.predict(X_pred)
- y_pred_prediction_set = pd.DataFrame(y_pred_prediction_set)
- y_pred_prediction_set.to_excel('Topsis评级预测-后20.xlsx')
-
- # 第二种方法:随机森林回归器
- X_train, X_test, y_train, y_test = train_test_split(X,
- y,
- test_size=0.2,
- random_state=27)
-
- model = RandomForestClassifier(random_state=27)
- model.fit(X_train, y_train)
-
- y_pred = model.predict(X_test)
- X_pred = prediction_set[features]
- y_pred_prediction_set = model.predict(X_pred)
- y_pred_prediction_set = pd.DataFrame(y_pred_prediction_set)
- y_pred_prediction_set.to_excel('Topsis评级预测-后20.xlsx')
-
-
- # 重要性分析
- importances = model.feature_importances_
- importances_df = pd.DataFrame({
- 'Feature': features,
- 'Importance_behavior': importances
- })
-
- dfs = [importances]
- df = reduce(lambda x, y: pd.merge(x, y, on="Feature", how="outer"), dfs)
- df = pd.DataFrame(df)
- df.to_excel('TopsisImportances-后20(8个特征重要性分析).xlsx')

要注意的是,如果用分类器,y的取值需要是离散数值
如果用回归器,不要求是离散数据,但需要是数值
所以两种方法都要对目标列先进行数值化处理
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。