当前位置:   article > 正文

2024亚太赛(中文)数学建模B题Python代码+结果表数据教学_亚太数学建模b题

亚太数学建模b题

B题题目:洪水灾害的数据分析与预测

完整论文也写完了

第二问代码(1、3、4问、还有论文见文末)

  1. import pandas as pd
  2. from sklearn.cluster import KMeans
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from matplotlib import rcParams
  6. # 设置matplotlib支持中文显示
  7. rcParams['font.sans-serif'] = ['PingFang HK'] # 设置字体为PingFang HK
  8. rcParams['axes.unicode_minus'] = False # 解决负号显示问题 # DS数模原创代码,请务必购买正版,群:689826519,有问题也会及时更新
  9. train_data = pd.read_csv('train.csv', encoding='GBK')
  10. # 使用K-means进行聚类分析
  11. # 将洪水概率列提取出来进行聚类
  12. X = train_data[['洪水概率']]
  13. # 使用K-means聚类
  14. kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
  15. train_data['风险类别'] = kmeans.labels_
  16. # 可视化聚类结果
  17. plt.figure(figsize=(10, 6))
  18. sns.scatterplot(x=train_data.index, y='洪水概率', hue='风险类别', data=train_data, palette='viridis')
  19. plt.title('洪水概率聚类结果')
  20. plt.show()
  21. # 分析不同风险类别的指标特征
  22. high_risk = train_data[train_data['风险类别'] == 0]
  23. medium_risk = train_data[train_data['风险类别'] == 1]
  24. low_risk = train_data[train_data['风险类别'] == 2]
  25. print("High risk group:\n", high_risk.describe())
  26. print("Medium risk group:\n", medium_risk.describe())
  27. print("Low risk group:\n", low_risk.describe())

  1. from sklearn.ensemble import RandomForestClassifier
  2. # 准备数据
  3. X = train_data.drop(['id', '洪水概率', '风险类别'], axis=1)
  4. y = train_data['风险类别']
  5. # 使用随机森林计算特征重要性
  6. clf = RandomForestClassifier(n_estimators=100, random_state=0)
  7. clf.fit(X, y)
  8. # 提取特征重要性
  9. feature_importances = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
  10. print("Feature importances:\n", feature_importances)
  11. # 选取前5个重要特征
  12. top_5_features = feature_importances.head(5).index.tolist()
  13. print("Top 5 features:\n", top_5_features)
  14. from sklearn.linear_model import LogisticRegression
  15. from sklearn.model_selection import train_test_split
  16. from sklearn.metrics import classification_report
  17. # 使用前5个特征
  18. X_top5 = train_data[top_5_features]
  19. # 划分训练集和测试集
  20. X_train, X_test, y_train, y_test = train_test_split(X_top5, y, test_size=0.2, random_state=0)
  21. # 训练逻辑回归模型
  22. model = LogisticRegression(max_iter=1000)
  23. model.fit(X_train, y_train)
  24. # 预测
  25. y_pred = model.predict(X_test)
  26. # 评估模型
  27. print(classification_report(y_test, y_pred))
  28. import numpy as np
  29. # 灵敏度分析
  30. sensitivity_analysis = {}
  31. for feature in top_5_features:
  32. original_value = X_test[feature].mean()
  33. sensitivity_analysis[feature] = []
  34. for change in np.linspace(-0.1, 0.1, 5): # DS数模原创代码,请务必购买正版,群:689826519,有问题也会及时更新
  35. X_test_copy = X_test.copy()
  36. X_test_copy[feature] += change
  37. y_pred = model.predict(X_test_copy)
  38. sensitivity_analysis[feature].append((change, (y_pred == y_test).mean()))
  39. print("Sensitivity analysis:\n", sensitivity_analysis)

  1. # 可视化灵敏度分析结果
  2. plt.figure(figsize=(14, 8))
  3. for feature, values in sensitivity_analysis.items():
  4. changes, accuracies = zip(*values)
  5. plt.plot(changes, accuracies, marker='o', label=feature)
  6. plt.title('Sensitivity Analysis of Top 5 Features')
  7. plt.xlabel('Change in Feature Value')
  8. plt.ylabel('Accuracy')
  9. plt.legend()
  10. plt.grid(True)
  11. plt.show()

更详细的思路、各题目思路、代码、讲解视频、成品论文及其他相关内容,可以点击下方群名片哦!

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/爱喝兽奶帝天荒/article/detail/1011434
推荐阅读
相关标签
  

闽ICP备14008679号