batch_first:主要为了规范输入数据各个维度所代表的含义。这里其实只需要记住一种情况即可,batch_first=True代表输入数据的三个维度分别代表input(batch_size, seq_len, input_size),输出数据的三个维度分别代表output(batch_size, seq_len, num_directions * hidden_size)






  1. class LSTM(torch.nn.Module):
  2. def __init__(self, hidden_size, output_size):
  3. super().__init__()
  4. self.input_size = 1
  5. self.hidden_size = hidden_size
  6. self.num_layers = 1
  7. self.output_size = output_size
  8. self.num_directions = 1 # 单向LSTM
  9. self.lstm = torch.nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
  10. self.lin = torch.nn.Linear(self.hidden_size, self.output_size)
  11. def forward(self, input_seq):
  12. batch_size, seq_len = input_seq.shape[0], input_seq.shape[1]
  13. # input(batch_size, seq_len, input_size)
  14. h_0 = torch.zeros(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
  15. c_0 = torch.zeros(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
  16. # output(batch_size, seq_len, num_directions * hidden_size)
  17. output, _ = self.lstm(input_seq, (h_0.detach(), c_0.detach()))
  18. pred = output[:, -1, :]
  19. pred = self.lin(pred)
  20. return pred



  1. import numpy as np
  2. import pandas as pd
  3. import torch
  4. from torch.utils.data import Dataset, DataLoader
  5. import matplotlib.pyplot as plt
  6. from tqdm import tqdm
  7. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  8. value = pd.read_csv(r'dataset/A5M.txt', header=None)#(14772, 1)
  9. time = pd.date_range(start='200411190930', periods=len(value), freq='5min')
  10. ts = pd.Series(value.iloc[:, 0].values, index=time)
  11. ts_sample_h = ts.resample('H').sum()
  12. # plt.plot(ts_sample_h)
  13. # plt.xlabel("Time")
  14. # plt.ylabel("traffic demand")
  15. # plt.title("resample history traffic demand from 5M to H")
  16. # plt.show()
  17. class MyDataset(Dataset):
  18. def __init__(self, data):
  19. self.data = data
  20. def __getitem__(self, item):
  21. return self.data[item]
  22. def __len__(self):
  23. return len(self.data)
  24. def nn_seq_us(B):
  25. dataset = ts_sample_h
  26. # split
  27. train = dataset[:int(len(dataset) * 0.7)]
  28. test = dataset[int(len(dataset) * 0.7):]
  29. m, n = np.max(train.values), np.min(train.values)
  30. # print(m,n)
  31. def process(data, batch_size, shuffle):
  32. load = data
  33. load = (load - n) / (m - n)
  34. seq = []
  35. for i in range(len(data) - 6):
  36. train_seq = []
  37. train_label = []
  38. for j in range(i, i + 6):
  39. x = [load[j]]
  40. train_seq.append(x)
  41. train_label.append(load[i + 6])
  42. train_seq = torch.FloatTensor(train_seq)
  43. train_label = torch.FloatTensor(train_label).view(-1)
  44. seq.append((train_seq, train_label))
  45. seq = MyDataset(seq)
  46. seq = DataLoader(dataset=seq, batch_size=batch_size, shuffle=shuffle, num_workers=0, drop_last=False)
  47. return seq
  48. Dtr = process(train, B, False)
  49. Dte = process(test, B, False)
  50. return Dtr, Dte, m, n
  51. class GRU(torch.nn.Module):
  52. def __init__(self, hidden_size, num_layers):
  53. super().__init__()
  54. self.input_size = 1
  55. self.hidden_size = hidden_size
  56. self.num_layers = num_layers
  57. self.output_size = 1
  58. self.num_directions = 1
  59. self.gru = torch.nn.GRU(self.input_size, self.hidden_size, self.num_layers, batch_first=True)
  60. self.linear = torch.nn.Linear(self.hidden_size, self.output_size)
  61. def forward(self, input_seq):
  62. batch_size, seq_len = input_seq.shape[0], input_seq.shape[1]
  63. h_0 = torch.randn(self.num_directions * self.num_layers, batch_size, self.hidden_size).to(device)
  64. # output(batch_size, seq_len, num_directions * hidden_size)
  65. output, _ = self.gru(input_seq, (h_0))
  66. pred = self.linear(output)
  67. pred = pred[:, -1, :]
  68. return pred
  69. Dtr, Dte, m, n= nn_seq_us(64)
  70. hidden_size, num_layers = 10, 2
  71. model = GRU(hidden_size, num_layers).to(device)
  72. loss_function = torch.nn.MSELoss().to(device)
  73. optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1.5e-3)
  74. # training
  75. trainloss_list = []
  76. model.train()
  77. for epoch in tqdm(range(50)):
  78. train_loss = []
  79. for (seq, label) in Dtr:
  80. seq = seq.to(device)#torch.Size([64, 80, 1])
  81. label = label.to(device)#torch.Size([64, 1])
  82. y_pred = model(seq)
  83. loss = loss_function(y_pred, label)
  84. train_loss.append(loss.item())
  85. optimizer.zero_grad()
  86. loss.backward()
  87. optimizer.step()
  88. trainloss_list.append(np.mean(train_loss))
  89. # training_loss的图
  90. plt.plot(trainloss_list)
  91. plt.xlabel("Epoch")
  92. plt.ylabel("MSE")
  93. plt.title("average of Training loss")
  94. plt.show()
  95. pred = []
  96. y = []
  97. model.eval()
  98. for (seq, target) in Dte:
  99. seq = seq.to(device)
  100. target = target.to(device)
  101. y_pred = model(seq)
  102. pred.append(y_pred)
  103. y.append(target)
  104. y=torch.cat(y, dim=0)
  105. pred=torch.cat(pred, dim=0)
  106. y = (m - n) * y + n
  107. pred = (m - n) * pred + n#torch.Size([179, 1])
  108. print('MSE:', loss_function(y, pred))
  109. # plot
  110. plt.plot(y.cpu().detach().numpy(), label='ground-truth')
  111. plt.plot(pred.cpu().detach().numpy(), label='prediction')
  112. plt.xlabel("Time")
  113. plt.ylabel("traffic demand")
  114. plt.title("history traffic demand from 5M to H")
  115. plt.show()


