赞
踩
.结合上述两个模型,构建集成学习模型,针对康复医学科三病房,预测对应的门诊收入数据。已提供的三病房数据可作为集成学习模型微调训练的基础数据。
为了完成第三小问的任务,即构建集成学习模型来预测康复医学科三病房的门诊收入数据,我们将按照以下步骤进行:
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from statsmodels.tsa.arima.model import ARIMA
- from sklearn.ensemble import RandomForestRegressor, StackingRegressor
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import mean_squared_error
- from sklearn.preprocessing import StandardScaler
-
- # 1. 加载数据
- data = pd.read_excel('数据.xlsx')
-
- # 2. 数据预处理
- # 选择康复医学科三病房的数据
- ward_data = data[data['当日病房收入对应科室'] == '康复医学科三病房']
-
- # 填充缺失值
- ward_data.fillna(method='ffill', inplace=True)
-
- # 创建有用的特征
- ward_data['日期'] = pd.to_datetime(ward_data['日期'])
- ward_data['Month'] = ward_data['日期'].dt.month
- ward_data['DayOfWeek'] = ward_data['日期'].dt.dayofweek
-
- # 准备训练和测试数据
- X = ward_data[['门诊患者人次数', '药品总收入', '当日病房收入', 'Month', 'DayOfWeek']]
- y = ward_data['门诊收入OBS_T01_MZSR68']
-
- # 3. 探索性数据分析
- plt.figure(figsize=(14, 7))
- plt.plot(ward_data['日期'], ward_data['门诊收入OBS_T01_MZSR68'])
- plt.title('康复医学科三病房门诊收入随时间的变化')
- plt.xlabel('日期')
- plt.ylabel('门诊收入')
- plt.show()
-
- # 4. 模型构建
- # 4.1 ARIMA 模型
- # 选择 p, d, q 参数
- p = 1
- d = 1
- q = 1
-
- # 4.2 随机森林模型
- rf = RandomForestRegressor(n_estimators=100, random_state=42)
-
- # 5. 模型训练与评估
- # 划分训练集和测试集
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
- # ARIMA 模型训练
- # 需要将时间序列数据转换为监督学习格式
- def series_to_supervised(data, n_in=1):
- df = pd.DataFrame(data)
- cols = list()
- for i in range(n_in, 0, -1):
- cols.append(df.shift(i))
- cols.append(df)
- agg = pd.concat(cols, axis=1)
- agg.dropna(inplace=True)
- return agg.values
-
- values = ward_data['门诊收入OBS_T01_MZSR68'].values
- supervised_data = series_to_supervised(values, 1)
- train = supervised_data[:int(len(supervised_data)*0.8)]
- test = supervised_data[int(len(supervised_data)*0.8):]
-
- history = [x for x in train]
- predictions = list()
- for t in range(len(test)):
- model = ARIMA(history, order=(p,d,q))
- model_fit = model.fit()
- output = model_fit.forecast()
- yhat = output[0]
- predictions.append(yhat)
- obs = test[t]
- history.append(obs)
- print('predicted=%f, expected=%f' % (yhat, obs))
-
- # 计算 MSE
- mse = mean_squared_error(test[:, 0], predictions)
- print('ARIMA Test MSE: %.3f' % mse)
-
- # 随机森林模型训练
- scaler = StandardScaler()
- X_train_scaled = scaler.fit_transform(X_train)
- X_test_scaled = scaler.transform(X_test)
-
- rf.fit(X_train_scaled, y_train)
-
- # 6. 集成学习模型
- # 使用 Stacking Regressor
- estimators = [
- ('arima', ARIMA(X_train_scaled, order=(p,d,q))),
- ('rf', rf)
- ]
-
- stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RandomForestRegressor(n_estimators=100, random_state=42))
-
- # 训练集成模型
- stacking_regressor.fit(X_train_scaled, y_train)
-
- # 7. 评估模型性能
- y_pred_arima = predictions
- y_pred_rf = rf.predict(X_test_scaled)
- y_pred_stacking = stacking_regressor.predict(X_test_scaled)
-
- # 计算 MSE
- mse_arima = mean_squared_error(y_test, y_pred_arima)
- mse_rf = mean_squared_error(y_test, y_pred_rf)
- mse_stacking = mean_squared_error(y_test, y_pred_stacking)
-
- print('ARIMA Test MSE: %.3f' % mse_arima)
- print('Random Forest Test MSE: %.3f' % mse_rf)
- print('Stacking Regressor Test MSE: %.3f' % mse_stacking)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。