赞
踩
B题题目:洪水灾害的数据分析与预测
完整论文也写完了
第二问代码(1、3、4问、还有论文见文末)
- import pandas as pd
- from sklearn.cluster import KMeans
- import matplotlib.pyplot as plt
- import seaborn as sns
- from matplotlib import rcParams
-
- # 设置matplotlib支持中文显示
- rcParams['font.sans-serif'] = ['PingFang HK'] # 设置字体为PingFang HK
-
- rcParams['axes.unicode_minus'] = False # 解决负号显示问题 # DS数模原创代码,请务必购买正版,群:689826519,有问题也会及时更新
-
-
- train_data = pd.read_csv('train.csv', encoding='GBK')
-
-
- # 使用K-means进行聚类分析
- # 将洪水概率列提取出来进行聚类
- X = train_data[['洪水概率']]
-
- # 使用K-means聚类
- kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
- train_data['风险类别'] = kmeans.labels_
-
- # 可视化聚类结果
- plt.figure(figsize=(10, 6))
- sns.scatterplot(x=train_data.index, y='洪水概率', hue='风险类别', data=train_data, palette='viridis')
- plt.title('洪水概率聚类结果')
- plt.show()
-
- # 分析不同风险类别的指标特征
- high_risk = train_data[train_data['风险类别'] == 0]
- medium_risk = train_data[train_data['风险类别'] == 1]
- low_risk = train_data[train_data['风险类别'] == 2]
-
- print("High risk group:\n", high_risk.describe())
- print("Medium risk group:\n", medium_risk.describe())
- print("Low risk group:\n", low_risk.describe())

- from sklearn.ensemble import RandomForestClassifier
-
- # 准备数据
- X = train_data.drop(['id', '洪水概率', '风险类别'], axis=1)
- y = train_data['风险类别']
-
- # 使用随机森林计算特征重要性
- clf = RandomForestClassifier(n_estimators=100, random_state=0)
- clf.fit(X, y)
-
- # 提取特征重要性
- feature_importances = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
-
- print("Feature importances:\n", feature_importances)
-
- # 选取前5个重要特征
- top_5_features = feature_importances.head(5).index.tolist()
- print("Top 5 features:\n", top_5_features)
-
- from sklearn.linear_model import LogisticRegression
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import classification_report
-
- # 使用前5个特征
- X_top5 = train_data[top_5_features]
-
- # 划分训练集和测试集
- X_train, X_test, y_train, y_test = train_test_split(X_top5, y, test_size=0.2, random_state=0)
-
- # 训练逻辑回归模型
- model = LogisticRegression(max_iter=1000)
- model.fit(X_train, y_train)
-
- # 预测
- y_pred = model.predict(X_test)
-
- # 评估模型
- print(classification_report(y_test, y_pred))
-
- import numpy as np
-
- # 灵敏度分析
- sensitivity_analysis = {}
- for feature in top_5_features:
- original_value = X_test[feature].mean()
- sensitivity_analysis[feature] = []
- for change in np.linspace(-0.1, 0.1, 5): # DS数模原创代码,请务必购买正版,群:689826519,有问题也会及时更新
- X_test_copy = X_test.copy()
- X_test_copy[feature] += change
- y_pred = model.predict(X_test_copy)
- sensitivity_analysis[feature].append((change, (y_pred == y_test).mean()))
-
- print("Sensitivity analysis:\n", sensitivity_analysis)
-

- # 可视化灵敏度分析结果
- plt.figure(figsize=(14, 8))
-
- for feature, values in sensitivity_analysis.items():
- changes, accuracies = zip(*values)
- plt.plot(changes, accuracies, marker='o', label=feature)
-
- plt.title('Sensitivity Analysis of Top 5 Features')
- plt.xlabel('Change in Feature Value')
- plt.ylabel('Accuracy')
- plt.legend()
- plt.grid(True)
- plt.show()
更详细的思路、各题目思路、代码、讲解视频、成品论文及其他相关内容,可以点击下方群名片哦!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。