当前位置:   article > 正文

基于pytorch的多变量单步LSTM时间序列预测_torch 多变量预测问题

torch 多变量预测问题

看到网上一个个代码都要钱,自己写了个LSTM分享一下,新手写的代码,有问题轻喷。。。

主程序,文件名随便 

  1. import torch
  2. import time
  3. import pandas as pd
  4. import numpy as np
  5. import torch.nn as nn
  6. from sklearn.preprocessing import MinMaxScaler
  7. from func import setup_seed, sliding_window, cmpt_error # 这部分自己写的函数
  8. # LSTM
  9. class LSTM(nn.Module):
  10. def __init__(self, input_size, hidden_size, output_size):
  11. super().__init__()
  12. self.hidden_size = hidden_size
  13. self.lstm = nn.LSTM(input_size, hidden_size) # 默认单层LSTM
  14. self.fc = nn.Linear(hidden_size, output_size)
  15. def forward(self, x):
  16. out = self.lstm(x)
  17. out = self.fc(out[:, -1, :])
  18. return out
  19. # 设置种子,保证预测精度可复现
  20. # setup_seed(10)
  21. # 读取数据
  22. df = pd.read_csv("data.csv", parse_dates=["timestamp"])
  23. # 数据集划分
  24. boundary_date = pd.to_datetime("2020-12-31 23:00:00") # 2017~2020年为训练集,2020~2021年为测试集
  25. mask = df["timestamp"] <= boundary_date
  26. train = df.loc[mask].iloc[:, 1:] # 得到训练集,用训练集作为归一化模板
  27. # 归一化
  28. scaler = MinMaxScaler()
  29. scaler_train = MinMaxScaler()
  30. scaler.fit(train)
  31. scaler_train.fit(train.iloc[:, :1])
  32. normalized_data = scaler.transform(df.iloc[:, 1:]) # 用训练集作模板归一化整个数据集
  33. # 基础参数设置
  34. time_step = 30 # 时间步长,就是利用多少组历史数据进行预测
  35. forecast_step = 1 # 预测步长,即预测未来第几步的数据
  36. feature_size = 6 # 输入特征数
  37. # 构造训练集和测试集
  38. [train_input, train_output, test_input, test_output] = sliding_window(normalized_data, len(train), time_step,
  39. forecast_step, feature_size,
  40. sample_feature_compression=False)
  41. # 输入、输出维度
  42. input_dim = len(train_input[0, 0, :])
  43. output_dim = 1
  44. hidden_dim = 20 # 炼丹
  45. # 设置默认张量类型,否则会因为类型不同报错,因为ndarray默认为float64,tensor默认为float32
  46. torch.set_default_tensor_type(torch.DoubleTensor)
  47. # 使用GPU运行
  48. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  49. # 转换为tensor
  50. train_inputs_tensor = torch.from_numpy(train_input).to(device)
  51. labels = torch.from_numpy(train_output).to(device)
  52. test_inputs_tensor = torch.from_numpy(test_input).to(device)
  53. # 指定参数和损失函数
  54. epochs = 5000 # 迭代次数
  55. learning_rate = 0.003 # 学习率
  56. # 多次运行,方便求误差平均值
  57. train_prediction_set = []
  58. prediction_set = []
  59. error = []
  60. start = time.perf_counter() # 运行开始时间
  61. # 多次运行取平均值
  62. multi_times = 1 # 运行次数
  63. for times in range(multi_times):
  64. # 输入、输出神经元数为input_dim、output_dim,隐含层神经元数为hidden_dim
  65. model = LSTM(input_dim, hidden_dim, output_dim).to(device)
  66. if times == 0:
  67. print(model) # 查看神经网络模型
  68. # 指定优化器为Adam,优化目标为model的参数,给定学习率
  69. optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  70. criterion = nn.MSELoss() # 损失函数
  71. # 训练模型
  72. train_predicted = 0 # 用来保存训练集预测数据
  73. for epoch in range(epochs):
  74. # 迭代梯度清零
  75. optimizer.zero_grad()
  76. # 前向传播
  77. train_outputs_tensor = model(train_inputs_tensor)
  78. # 计算损失
  79. loss = criterion(train_outputs_tensor, labels)
  80. # 反向传播
  81. loss.backward()
  82. # 更新权重参数
  83. optimizer.step()
  84. # 每500次训练输出一次损失值
  85. if (epoch + 1) % 100 == 0:
  86. print(f'epoch {epoch + 1}, loss {loss}')
  87. if epoch == epochs - 1:
  88. train_predicted = train_outputs_tensor.detach().cpu().numpy()
  89. # 预测结果
  90. predicted = model(test_inputs_tensor)[0].detach().cpu().numpy()
  91. # 逆缩放
  92. train_predicted = scaler_train.inverse_transform(train_predicted) # 训练集预测数据
  93. predicted = scaler_train.inverse_transform(predicted) # 预测值
  94. target = scaler_train.inverse_transform(test_output) # 目标值
  95. # 计算误差
  96. error.append(cmpt_error(predicted, target))
  97. # 保存每次预测结果
  98. train_prediction_set.append(train_predicted)
  99. prediction_set.append(predicted)
  100. end = time.perf_counter() # 运行结束时间
  101. runTime = end - start
  102. print("Run time: ", runTime) # 输出运行时间
  103. # 数据排序
  104. train_prediction_set = np.array(train_prediction_set)[:, :, 0].T
  105. prediction_set = np.array(prediction_set)[:, :, 0].T
  106. error = np.array(error).T
  107. prediction_set = np.vstack([train_prediction_set, prediction_set])
  108. error_prediction = pd.DataFrame(np.vstack([error, prediction_set])) # 将误差和预测数据堆叠起来,方便排序
  109. error_prediction = error_prediction.sort_values(by=2, axis=1) # NRMSE在第三行,以NRMSE从小到大排序
  110. # 保存数据
  111. # error_prediction.iloc[3:, :]是因为前三行是误差,如果用了更多的误差指标记得修改
  112. prediction_set = pd.DataFrame(np.array(error_prediction.iloc[3:, :]), columns=[i for i in range(1, multi_times + 1)])
  113. error = pd.DataFrame(np.array(error_prediction.iloc[:3, :]), columns=[i for i in range(1, multi_times + 1)],
  114. index=['MAE', 'RMSE', 'NRMSE'])
  115. prediction_set.to_excel('LSTM.xlsx', index=False, sheet_name='LSTM')
  116. with pd.ExcelWriter('LSTM.xlsx', mode='a', engine='openpyxl') as writer:
  117. error.to_excel(writer, sheet_name='error')

 文件名error_calculation.py

  1. import math
  2. def mae(predicted, target):
  3. """计算平均绝对误差MAE。"""
  4. return (abs(target - predicted)).mean()
  5. def mse(predicted, target):
  6. """计算均方误差MSE。"""
  7. return ((target - predicted) ** 2).mean()
  8. def rmse(predicted, target):
  9. """计算均方根误差RMSE。"""
  10. return math.sqrt(mse(predicted, target))
  11. def nrmse(predicted, target):
  12. """计算正规化均方根误差NRMSE,采用极差(最大和最小值之差)来正规化。"""
  13. return rmse(predicted, target) / (target.max() - target.min())

文件名func.py

  1. import torch
  2. import random
  3. import numpy as np
  4. import pandas as pd
  5. from error_calculation import mae, rmse, nrmse
  6. def setup_seed(seed):
  7. """设置随机数种子,保证每次运行结果相同"""
  8. torch.manual_seed(seed)
  9. torch.cuda.manual_seed_all(seed)
  10. np.random.seed(seed)
  11. random.seed(seed)
  12. # torch.backends.cudnn.deterministic = True # 本行对精度影响不大,但会明显降低运行效率,不需要高精度的话可以注释掉
  13. def sliding_window(normalized_data, train_length, time_step, forecast_step, feature_size=1,
  14. sample_feature_compression=True):
  15. """用滑动窗口将标准化数据集的样本划分为训练集和测试集,sample_feature_compression是选择是否将这个时间步长的特征压缩为向量"""
  16. inputs = []
  17. outputs = []
  18. for i in range(len(normalized_data) - time_step - forecast_step + 1): # 构造的数据集长度可以该式计算得到
  19. package = []
  20. # 将不同特征打包
  21. for j in range(feature_size):
  22. package.append(normalized_data[i:i + time_step][:, j])
  23. # 构造输入和输出,将整个时间步长的数据保存到input中,将未来的数据保存到output中
  24. if sample_feature_compression:
  25. inputs.append(np.array(package).reshape(1, -1)[0, :])
  26. else:
  27. inputs.append(np.array(package).T)
  28. outputs.append(normalized_data[i + time_step][0])
  29. inputs = np.array(inputs)
  30. outputs = np.array(outputs).reshape(-1, 1)
  31. # 划分训练集和测试集
  32. train_input = inputs[:train_length - time_step - forecast_step + 1]
  33. train_output = outputs[:train_length - time_step - forecast_step + 1]
  34. test_input = inputs[train_length - time_step - forecast_step + 1:]
  35. test_output = outputs[train_length - time_step - forecast_step + 1:]
  36. return [train_input, train_output, test_input, test_output]
  37. def cmpt_error(predicted, target):
  38. """对比校正值和标准值,并输出误差"""
  39. # 对比校正值和标准值
  40. contrast = pd.DataFrame(np.hstack((predicted, target)), columns=['预测值', '目标值'])
  41. print(contrast)
  42. # 输出误差
  43. mae1 = mae(predicted, target)
  44. rmse1 = rmse(predicted, target)
  45. nrmse1 = nrmse(predicted, target)
  46. print('预测MAE误差:', mae1)
  47. print('预测RMSE误差:', rmse1)
  48. print(f'预测NRMSE误差:{"%.2f" % (nrmse1 * 100)}%')
  49. return [mae1, rmse1, nrmse1]

 东西丢一个文件夹,运行主程序就行,不过光伏输出功率每日的变化较大,误差也不小。至于data.csv上传了,设定的0积分下载,不知道大家能不能免费下载。

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号