当前位置:   article > 正文

LSTM模型实现预测_lstm预测

lstm预测

最近突发奇想,要通过机器学习来优化时间序列的预测结果。

下面就是我通过使用BP神经网络优化版本的代码。

  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. from sklearn.preprocessing import MinMaxScaler
  5. import tensorflow as tf
  6. from tensorflow.keras.models import Sequential
  7. from tensorflow.keras import regularizers
  8. from tensorflow.keras.layers import LSTM, Dense
  9. from tensorflow.keras.optimizers.schedules import ExponentialDecay
  10. from tensorflow.keras.layers import Dropout
  11. plt.rcParams['font.sans-serif'] = ['SimHei']
  12. # 数据读取
  13. file_path = '文件位置'
  14. data = pd.read_excel(file_path)
  15. # 数据预处理
  16. scaler = MinMaxScaler(feature_range=(0, 1))
  17. scaled_data = scaler.fit_transform(data[['需要预测的数据']])
  18. # 创建训练集和测试集
  19. train_size = int(len(scaled_data) * 0.8)
  20. train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]
  21. # 准备时间序列数据
  22. def create_dataset(data, look_back=1):
  23. X, Y = [], []
  24. for i in range(len(data) - look_back):
  25. X.append(data[i:(i + look_back), 0])
  26. Y.append(data[i + look_back, 0])
  27. return np.array(X), np.array(Y)
  28. look_back = 1 # 由于是单变量时间序列,简单的滞后1步
  29. X_train, y_train = create_dataset(train_data, look_back)
  30. X_test, y_test = create_dataset(test_data, look_back)
  31. # 重塑输入数据以适应LSTM层
  32. X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1])) # 这里应该是正确的
  33. X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1])) # 同上
  34. # 构建LSTM模型_通过对LSTM模型采用进一步改进
  35. model = Sequential()
  36. model.add(LSTM(units=100, return_sequences=True,
  37. input_shape=(1, 1), kernel_regularizer=regularizers.l2(0.001),
  38. recurrent_regularizer=regularizers.l2(0.001))) # L2正则化
  39. model.add(Dropout(0.2)) # 添加Dropout层
  40. model.add(LSTM(units=50, kernel_regularizer=regularizers.l2(0.001),
  41. recurrent_regularizer=regularizers.l2(0.001))) # 第二个LSTM层也加入正则化
  42. model.add(Dropout(0.2)) # 再次添加Dropout
  43. model.add(Dense(1))
  44. # 使用Adam优化器,并考虑学习率衰减
  45. initial_learning_rate = 0.001
  46. lr_schedule = ExponentialDecay(
  47. initial_learning_rate,
  48. decay_steps=100000,
  49. decay_rate=0.96,
  50. staircase=True)
  51. optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
  52. model.compile(loss='mean_squared_error', optimizer=optimizer)
  53. model.summary()
  54. # 训练模型时考虑使用EarlyStopping回调防止过拟合
  55. from tensorflow.keras.callbacks import EarlyStopping
  56. early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
  57. history = model.fit(X_train, y_train, epochs=300, batch_size=32, verbose=2,
  58. validation_split=0.1, callbacks=[early_stopping]) # 增加验证集和早停
  59. # 训练模型
  60. model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2)
  61. # 预测
  62. train_predict = model.predict(X_train)
  63. test_predict = model.predict(X_test)
  64. # 转化维度
  65. print(y_train)
  66. print(y_test)
  67. y_train = y_train.reshape(-1, 1)
  68. y_test = y_test.reshape(-1, 1)
  69. print(y_train)
  70. print(y_test)
  71. # 反标准化
  72. train_predict = scaler.inverse_transform(train_predict)
  73. y_train = scaler.inverse_transform(y_train)
  74. test_predict = scaler.inverse_transform(test_predict)
  75. y_test = scaler.inverse_transform(y_test)
  76. # 计算预测误差
  77. train_mae = np.mean(np.abs(train_predict - y_train))
  78. test_mae = np.mean(np.abs(test_predict - y_test))
  79. print(f'Train Mean Absolute Error: {train_mae:.2f}')
  80. print(f'Test Mean Absolute Error: {test_mae:.2f}')
  81. # 生成与训练/测试数据长度匹配的索引
  82. # 索引调整正确,但注意train_index应该是基于原始数据长度减去look_back,因为create_dataset会消耗掉最后一个元素
  83. train_years = data['year'][:train_size]
  84. test_years = data['year'][train_size:]
  85. # 确保训练年份与训练目标长度一致
  86. train_years_for_plot = train_years[:-1]
  87. test_years_for_plot = test_years[:-1]
  88. # print(train_years_for_plot)
  89. # print(test_years_for_plot)
  90. # 计算测试集的误差
  91. test_errors = np.abs(test_predict - y_test)
  92. test_errors = test_errors.flatten()
  93. # 绘制预测值与实际值的对比图
  94. plt.figure(figsize=(12, 6))
  95. plt.plot(train_years_for_plot, y_train, label='训练真实值', linestyle='--')
  96. plt.plot(train_years_for_plot, train_predict.flatten(), label='训练预测值', linestyle=':')
  97. plt.plot(test_years_for_plot, y_test, label='测试真实值', marker='o')
  98. plt.plot(test_years_for_plot, test_predict.flatten(), label='预测真实值', marker='x')
  99. # 添加误差线
  100. plt.errorbar(test_years_for_plot, test_predict.flatten(), yerr=test_errors, fmt='none', ecolor='red', capsize=5,
  101. label='误差')
  102. plt.title('LSTM下真实与预测的(填上你的变量名称)')
  103. plt.xlabel('年份')
  104. plt.ylabel('预测变量名称')
  105. plt.legend()
  106. plt.show()
  107. # 打印每个测试点的误差
  108. for actual, predicted, error in zip(y_test, test_predict.flatten(), test_errors):
  109. print(f"Actual: {actual[0]:.2f}, Predicted: {predicted:.2f}, Error: {error:.2f}")
  110. # 预测未来5年
  111. last_years_data = data['预测变量名称'].values[-look_back:] # 这里已经是正确的
  112. last_years_scaled = scaler.transform(last_years_data.reshape(-1, 1))
  113. future_input = np.array([last_years_scaled[-1]]).reshape(1, 1, 1)
  114. future_prediction = []
  115. for _ in range(5):
  116. prediction = model.predict(future_input)[0]
  117. future_prediction.append(prediction)
  118. # 直接使用预测值进行下一次循环,不需要再次转换和拼接
  119. future_input = np.array([prediction]).reshape(1, 1, 1)
  120. future_prediction = scaler.inverse_transform(np.array(future_prediction).reshape(-1, 1))
  121. print("\nPredicted Income for the next 5 years:")
  122. for i, year in enumerate(range(2024, 2029)):
  123. print(f"Year {year}: Predicted Income {future_prediction[i][0]:.2f}")

上述代码中的‘year’为时间变量,如果你的为其他,别忘了修改

这个代码要想得到比较好的结果,需要进行多次执行才行。因为建立的这个LSTM是一个需要不断自行调试的过程。

希望这篇文章能够给你帮助。

本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/人工智能uu/article/detail/863514
推荐阅读
相关标签
  

闽ICP备14008679号