当前位置:   article > 正文

Transformer for Time Series Prediction完整代码_=时间序列预测 transformer源代码

=时间序列预测 transformer源代码

时序预测小白,看到很多Transfirmer在时间序列预测上应用的代码,一头雾水,很烦,所以想自己记录一下,以求真正把Transformer怎么用在时间序列预测上讲清楚,捋明白。

我直接用代码来讲,用一个公开数据集(ETTh1)来做例子。

首先导包

  1. import sys
  2. import argparse
  3. import os
  4. import torch
  5. import torch.nn as nn
  6. import torch.nn.functional as F
  7. import math
  8. import time
  9. import numpy as np
  10. import pandas as pd
  11. from torch.utils.data import Dataset, DataLoader
  12. from typing import List
  13. from pandas.tseries import offsets
  14. from pandas.tseries.frequencies import to_offset
  15. import random
  16. import matplotlib.pyplot as plt

这些包的作用后面都会有。

然后设置随机种子

  1. sys.argv = ['run.py']
  2. fix_seed = 2024
  3. random.seed(fix_seed)
  4. torch.manual_seed(fix_seed)
  5. np.random.seed(fix_seed)

然后传入参数

  1. parser = argparse.ArgumentParser(description='Transformer')
  2. parser.add_argument('--model', type=str, required=False, default='Transformer', help='model of experiment')
  3. parser.add_argument('--data', type=str, required=False, default='ETTh1', help='dataset')
  4. parser.add_argument('--root_path', type=str, default='./data/', help='root path of the data file')
  5. parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
  6. parser.add_argument('--features', type=str, default='MS', help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
  7. parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
  8. parser.add_argument('--freq', type=str, default='h', help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly]')
  9. parser.add_argument('--seq_len', type=int, default=96, help='input sequence length of Transformer encoder')
  10. parser.add_argument('--label_len', type=int, default=48, help='start token length of Transformer decoder')
  11. parser.add_argument('--pred_len', type=int, default=24, help='prediction sequence length')
  12. parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
  13. parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
  14. parser.add_argument('--c_out', type=int, default=7, help='output size')
  15. parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
  16. parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
  17. parser.add_argument('--e_layers', type=int, default=6, help='num of encoder layers')
  18. parser.add_argument('--d_layers', type=int, default=2, help='num of decoder layers')
  19. parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
  20. parser.add_argument('--padding', type=int, default=0, help='padding type')
  21. parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
  22. parser.add_argument('--activation', type=str, default='gelu',help='activation')
  23. parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
  24. parser.add_argument('--num_workers', type=int, default=0, help='data loader num workers')
  25. parser.add_argument('--train_epochs', type=int, default=6, help='number of train epochs')
  26. parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
  27. parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
  28. parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
  29. parser.add_argument('--loss', type=str, default='mse',help='loss function')
  30. parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
  31. parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
  32. parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
  33. parser.add_argument('--gpu', type=int, default=0, help='gpu')
  34. parser.add_argument('--is_rolling_predict', type=bool, default=True, help='rolling predict')
  35. parser.add_argument('--rolling_data_path', type=str, default='ETTh1-Test.csv', help='data file')
  36. parser.add_argument('--do_predict', action='store_true', default=True, help='whether to predict unseen future data')
  37. args = parser.parse_args()
  38. args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
  39. data_parser = {
  40. 'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
  41. 'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
  42. 'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
  43. 'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
  44. 'WTH':{'data':'WTH.csv','T':'WetBulbCelsius','M':[12,12,12],'S':[1,1,1],'MS':[12,12,1]},
  45. 'ECL':{'data':'ECL.csv','T':'MT_320','M':[321,321,321],'S':[1,1,1],'MS':[321,321,1]},
  46. 'Solar':{'data':'solar_AL.csv','T':'POWER_136','M':[137,137,137],'S':[1,1,1],'MS':[137,137,1]},
  47. }
  48. if args.data in data_parser.keys():
  49. data_info = data_parser[args.data]
  50. args.data_path = data_info['data']
  51. args.target = data_info['T'] # 'OT'
  52. args.enc_in, args.dec_in, args.c_out = data_info[args.features] # 7, 7, 1
  53. print('Args in experiment:')
  54. print(args)

可以看出我们设置的一些默认参数

然后下面就开始写类和函数了,共有12个类和3个函数。

首先是TokenEmbedding

  1. class TokenEmbedding(nn.Module):
  2. def __init__(self, c_in, d_model):
  3. super(TokenEmbedding, self).__init__()
  4. self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=3, padding=1, padding_mode='circular')
  5. # weight initialization
  6. for m in self.modules():
  7. if isinstance(m, nn.Conv1d):
  8. nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
  9. def forward(self, x):
  10. # x is a tensor with Size([32, 96, 7])
  11. x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
  12. # After Conv1d, x is a torch with Size([32, 96, 512])
  13. return x

然后是

  1. class PositionalEmbedding(nn.Module):
  2. def __init__(self, d_model, max_len=5000):
  3. super(PositionalEmbedding, self).__init__()
  4. # Compute the positional encodings once in log space.
  5. pe = torch.zeros(max_len, d_model).float() # pe is a tensor with Size([5000, 512]) with all 0.
  6. pe.require_grad = False
  7. position = torch.arange(0, max_len).float().unsqueeze(1) # position is a tensor with Size([5000, 1])
  8. div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() # div_term is a tensor with Size([256])
  9. # position * div_term is a tensor with Size([5000, 256])
  10. pe[:, 0::2] = torch.sin(position * div_term) # 把sin结果赋给奇数列
  11. pe[:, 1::2] = torch.cos(position * div_term) # 把cos结果赋给偶数列
  12. pe = pe.unsqueeze(0) # pe is a tensor with Size([1, 5000, 512])
  13. self.register_buffer('pe', pe)
  14. def forward(self, x):
  15. return self.pe[:, :x.size(1)] # return is a tensor with Size([1, 96, 512])

然后是时间特征的嵌入

  1. class TimeFeatureEmbedding(nn.Module):
  2. def __init__(self, d_model, freq='h'):
  3. super(TimeFeatureEmbedding, self).__init__()
  4. freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
  5. d_inp = freq_map[freq] # 4
  6. self.embed = nn.Linear(d_inp, d_model) # 4, 512
  7. def forward(self, x):
  8. return self.embed(x)

数据嵌入

  1. class DataEmbedding(nn.Module):
  2. def __init__(self, c_in, d_model, freq, dropout):
  3. super(DataEmbedding, self).__init__()
  4. self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
  5. self.position_embedding = PositionalEmbedding(d_model=d_model)
  6. self.time_feature_embedding = TimeFeatureEmbedding(d_model=d_model, freq=freq)
  7. self.dropout = nn.Dropout(p=dropout)
  8. def forward(self, x, x_mark):
  9. # x is a tensor with Size([32, 96, 7])
  10. # x_mark is a tensor with Size([32, 96, 4])
  11. x = self.value_embedding(x) + self.position_embedding(x) + self.time_feature_embedding(x_mark)
  12. return self.dropout(x) # return is a tensor with Size([32, 96, 512])

掩码的实现

  1. class TriangularCausalMask():
  2. def __init__(self, B, L, device="cpu"):
  3. mask_shape = [B, 1, L, L]
  4. with torch.no_grad():
  5. self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
  6. @property
  7. def mask(self):
  8. return self._mask

注意力机制

  1. class FullAttention(nn.Module):
  2. def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
  3. super(FullAttention, self).__init__()
  4. self.scale = scale # None
  5. self.mask_flag = mask_flag # False
  6. self.output_attention = output_attention # False
  7. self.dropout = nn.Dropout(attention_dropout) # attention_dropout = 0.05
  8. def forward(self, queries, keys, values, attn_mask):
  9. # shape of queries is [b, l, h, e]
  10. # shape of keys is [b, s, h, e]
  11. # shape of values is [b, s, h, d]
  12. B, L, H, E = queries.shape # 32, 96, 8, 64
  13. _, S, _, D = values.shape # 96 64
  14. scale = self.scale or 1. / math.sqrt(E) # 0.125
  15. scores = torch.einsum("blhe,bshe->bhls", queries, keys) # scores is a tensor with Size([32, 8, 96, 96])
  16. # self.mask_flag = False in encoder but self.mask_flag = True in decoder
  17. if self.mask_flag:
  18. if attn_mask is None:
  19. attn_mask = TriangularCausalMask(B, L, device=queries.device)
  20. scores.masked_fill_(attn_mask.mask, -np.inf)
  21. A = self.dropout(torch.softmax(scale * scores, dim=-1)) # A is a tensor with Size([32, 8, 96, 96])
  22. V = torch.einsum("bhls,bshd->blhd", A, values) # V is a tensor with Size([32, 96, 8, 64])
  23. if self.output_attention:
  24. return (V.contiguous(), A)
  25. else: # 走这条路
  26. return (V.contiguous(), None)

注意力层

  1. class AttentionLayer(nn.Module):
  2. def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
  3. super(AttentionLayer, self).__init__()
  4. d_keys = d_keys or (d_model // n_heads) # 512 / 8 = 64
  5. d_values = d_values or (d_model // n_heads) # 512 / 8 = 64
  6. self.inner_attention = attention # FullAttention(...)
  7. self.query_projection = nn.Linear(d_model, d_keys * n_heads) # 512 512
  8. self.key_projection = nn.Linear(d_model, d_keys * n_heads) # 512 512
  9. self.value_projection = nn.Linear(d_model, d_values * n_heads) # 512 512
  10. self.out_projection = nn.Linear(d_values * n_heads, d_model) # 512 512 # 线性拼接8个头
  11. self.n_heads = n_heads # 8
  12. def forward(self, queries, keys, values, attn_mask):
  13. B, L, _ = queries.shape # B = 32, L = 96
  14. _, S, _ = keys.shape # S = 96
  15. H = self.n_heads # H = 8
  16. queries = self.query_projection(queries).view(B, L, H, -1) # queries is a tensor with Size([32, 96, 8, 64])
  17. keys = self.key_projection(keys).view(B, S, H, -1) # keys is a tensor with Size([32, 96, 8, 64])
  18. values = self.value_projection(values).view(B, S, H, -1) # values is a tensor with Size([32, 96, 8, 64])
  19. out, attn = self.inner_attention(queries, keys, values, attn_mask) # out is a tensor with Size([32, 96, 8, 64])
  20. out = out.view(B, L, -1) # out is a tensor with Size([32, 96, 512])
  21. return self.out_projection(out), attn

编码器层

  1. class EncoderLayer(nn.Module):
  2. def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
  3. super(EncoderLayer, self).__init__()
  4. d_ff = d_ff or 4 * d_model # 512 * 4 = 2048
  5. self.attention = attention
  6. self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
  7. self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
  8. self.norm1 = nn.LayerNorm(d_model)
  9. self.norm2 = nn.LayerNorm(d_model)
  10. self.dropout = nn.Dropout(dropout)
  11. self.activation = F.relu if activation == "relu" else F.gelu
  12. def forward(self, x, attn_mask=None):
  13. # x [B, L, D] 32, 96, 512
  14. new_x, attn = self.attention(x, x, x, attn_mask = attn_mask) # new_x is a tensor with Size([32, 96, 512])
  15. x = x + self.dropout(new_x) # connection # x is a tensor with Size([32, 96, 512])
  16. y = x = self.norm1(x) # x and y are a tensor with Size([32, 96, 512])
  17. y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
  18. y = self.dropout(self.conv2(y).transpose(-1,1)) # y is a tensor with Size([32, 96, 512])
  19. return self.norm2(x+y), attn

编码器

  1. class Encoder(nn.Module):
  2. def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
  3. super(Encoder, self).__init__()
  4. self.attn_layers = nn.ModuleList(attn_layers)
  5. self.norm = norm_layer
  6. def forward(self, x, attn_mask=None):
  7. # x [B, L, D] [32, 96, 512]
  8. attns = []
  9. for attn_layer in self.attn_layers:
  10. x, attn = attn_layer(x, attn_mask=attn_mask)
  11. attns.append(attn)
  12. if self.norm is not None:
  13. x = self.norm(x)
  14. return x, attns

解码器层

  1. class DecoderLayer(nn.Module):
  2. def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
  3. super(DecoderLayer, self).__init__()
  4. d_ff = d_ff or 4 * d_model
  5. self.self_attention = self_attention
  6. self.cross_attention = cross_attention
  7. self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
  8. self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
  9. self.norm1 = nn.LayerNorm(d_model)
  10. self.norm2 = nn.LayerNorm(d_model)
  11. self.norm3 = nn.LayerNorm(d_model)
  12. self.dropout = nn.Dropout(dropout)
  13. self.activation = F.relu if activation == "relu" else F.gelu
  14. def forward(self, x, cross, x_mask=None, cross_mask=None):
  15. x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
  16. x = self.norm1(x)
  17. x = x + self.dropout(self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0])
  18. y = x = self.norm2(x)
  19. y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
  20. y = self.dropout(self.conv2(y).transpose(-1,1))
  21. return self.norm3(x+y)

解码器

  1. class Decoder(nn.Module):
  2. def __init__(self, layers, norm_layer=None):
  3. super(Decoder, self).__init__()
  4. self.layers = nn.ModuleList(layers)
  5. self.norm = norm_layer
  6. def forward(self, x, cross, x_mask=None, cross_mask=None):
  7. # x is the output of decoder_embedding
  8. # cross is the output of encoder
  9. for layer in self.layers:
  10. x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
  11. if self.norm is not None:
  12. x = self.norm(x)
  13. return x

最重要的Transformer类

  1. class Transformer(nn.Module):
  2. def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, pred_len, factor, d_model, n_heads, e_layers, d_layers, d_ff,
  3. dropout, freq, activation, output_attention, device):
  4. super(Transformer, self).__init__()
  5. self.pred_len = pred_len # 24
  6. self.output_attention = output_attention # False
  7. # Embedding
  8. self.enc_embedding = DataEmbedding(enc_in, d_model, freq, dropout)
  9. self.dec_embedding = DataEmbedding(dec_in, d_model, freq, dropout)
  10. # Encoder
  11. self.encoder = Encoder(
  12. [
  13. EncoderLayer(
  14. AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=output_attention), d_model, n_heads),
  15. d_model,
  16. d_ff,
  17. dropout=dropout,
  18. activation=activation
  19. ) for l in range(e_layers)
  20. ],
  21. norm_layer=torch.nn.LayerNorm(d_model)
  22. )
  23. # Decoder
  24. self.decoder = Decoder(
  25. [
  26. DecoderLayer(
  27. AttentionLayer(FullAttention(True, factor, attention_dropout=dropout, output_attention=True), d_model, n_heads),
  28. AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), d_model, n_heads),
  29. d_model,
  30. d_ff,
  31. dropout=dropout,
  32. activation=activation,
  33. )
  34. for l in range(d_layers)
  35. ],
  36. norm_layer=nn.LayerNorm(d_model)
  37. )
  38. self.projection = nn.Linear(d_model, c_out, bias=True)
  39. def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
  40. enc_embedding_out = self.enc_embedding(x_enc, x_mark_enc) # enc_out is a tensor with Size([32, 96, 512])
  41. enc_out, attns = self.encoder(enc_embedding_out, attn_mask=enc_self_mask) # enc_out is a tensor with Size([32, 96, 512])
  42. dec_embedding_out = self.dec_embedding(x_dec, x_mark_dec)
  43. dec_out = self.decoder(dec_embedding_out, enc_out, x_mask=dec_self_mask) # dec_out is a tensor with Size([32, 72, 512])
  44. model_output = self.projection(dec_out) # enc_out is a tensor with Size([32, 72, 1])
  45. return model_output[:, -self.pred_len:, :] # [B, L, D] [32, 24, 1]

调整学习率的方法

  1. def adjust_learning_rate(optimizer, epoch, args):
  2. if args.lradj=='type1': # 走这条路
  3. lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch-1) // 1))}
  4. elif args.lradj=='type2':
  5. lr_adjust = {
  6. 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
  7. 10: 5e-7, 15: 1e-7, 20: 5e-8
  8. }
  9. if epoch in lr_adjust.keys():
  10. lr = lr_adjust[epoch]
  11. for param_group in optimizer.param_groups:
  12. param_group['lr'] = lr
  13. print('Updating learning rate to {}'.format(lr)) # 这里输出

早停类

  1. class EarlyStopping:
  2. def __init__(self, patience, verbose, delta=0):
  3. self.patience = patience # 3
  4. self.verbose = verbose # True
  5. self.counter = 0
  6. self.best_score = None
  7. self.early_stop = False
  8. self.val_loss_min = np.Inf
  9. self.delta = delta
  10. def __call__(self, val_loss, model, path):
  11. score = -val_loss
  12. if self.best_score is None:
  13. self.best_score = score
  14. self.save_checkpoint(val_loss, model, path)
  15. elif score < self.best_score + self.delta:
  16. self.counter += 1
  17. print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
  18. if self.counter >= self.patience:
  19. self.early_stop = True
  20. else:
  21. self.best_score = score
  22. self.save_checkpoint(val_loss, model, path)
  23. self.counter = 0
  24. def save_checkpoint(self, val_loss, model, path):
  25. if self.verbose:
  26. print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') # 这里输出
  27. torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
  28. self.val_loss_min = val_loss

数据预处理

  1. class StandardScaler():
  2. def __init__(self):
  3. self.mean = 0.
  4. self.std = 1.
  5. def fit(self, data):
  6. self.mean = data.mean(0)
  7. self.std = data.std(0)
  8. def transform(self, data):
  9. mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
  10. std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
  11. return (data - mean) / std
  12. def inverse_transform(self, data):
  13. mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
  14. std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
  15. if data.shape[-1] != mean.shape[-1]:
  16. mean = mean[-1:]
  17. std = std[-1:]
  18. return (data * std) + mean

时间特征的提取

  1. class TimeFeature:
  2. def __init__(self):
  3. pass
  4. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  5. pass
  6. def __repr__(self):
  7. return self.__class__.__name__ + "()"
  8. class SecondOfMinute(TimeFeature):
  9. """Minute of hour encoded as value between [-0.5, 0.5]"""
  10. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  11. return index.second / 59.0 - 0.5
  12. class MinuteOfHour(TimeFeature):
  13. """Minute of hour encoded as value between [-0.5, 0.5]"""
  14. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  15. return index.minute / 59.0 - 0.5
  16. class HourOfDay(TimeFeature):
  17. """Hour of day encoded as value between [-0.5, 0.5]"""
  18. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  19. return index.hour / 23.0 - 0.5
  20. class DayOfWeek(TimeFeature):
  21. """Hour of day encoded as value between [-0.5, 0.5]"""
  22. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  23. return index.dayofweek / 6.0 - 0.5
  24. class DayOfMonth(TimeFeature):
  25. """Day of month encoded as value between [-0.5, 0.5]"""
  26. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  27. return (index.day - 1) / 30.0 - 0.5
  28. class DayOfYear(TimeFeature):
  29. """Day of year encoded as value between [-0.5, 0.5]"""
  30. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  31. return (index.dayofyear - 1) / 365.0 - 0.5
  32. class MonthOfYear(TimeFeature):
  33. """Month of year encoded as value between [-0.5, 0.5]"""
  34. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  35. return (index.month - 1) / 11.0 - 0.5
  36. class WeekOfYear(TimeFeature):
  37. """Week of year encoded as value between [-0.5, 0.5]"""
  38. def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
  39. return (index.week - 1) / 52.0 - 0.5
  40. def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
  41. features_by_offsets = {
  42. offsets.YearEnd: [],
  43. offsets.QuarterEnd: [MonthOfYear],
  44. offsets.MonthEnd: [MonthOfYear],
  45. offsets.Week: [DayOfMonth, WeekOfYear],
  46. offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
  47. offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
  48. offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], # 找到这一项
  49. offsets.Minute: [
  50. MinuteOfHour,
  51. HourOfDay,
  52. DayOfWeek,
  53. DayOfMonth,
  54. DayOfYear,
  55. ],
  56. offsets.Second: [
  57. SecondOfMinute,
  58. MinuteOfHour,
  59. HourOfDay,
  60. DayOfWeek,
  61. DayOfMonth,
  62. DayOfYear,
  63. ],
  64. }
  65. offset = to_offset(freq_str) # <Hour>
  66. for offset_type, feature_classes in features_by_offsets.items():
  67. if isinstance(offset, offset_type):
  68. return [cls() for cls in feature_classes] # return [HourOfDay(), DayOfWeek(), DayOfMonth(), DayOfYear()]
  69. supported_freq_msg = f"""
  70. Unsupported frequency {freq_str}
  71. The following frequencies are supported:
  72. Y - yearly
  73. alias: A
  74. M - monthly
  75. W - weekly
  76. D - daily
  77. B - business days
  78. H - hourly
  79. T - minutely
  80. alias: min
  81. S - secondly
  82. """
  83. raise RuntimeError(supported_freq_msg)
  84. def time_features(dates, timeenc, freq):
  85. """
  86. > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
  87. > * m - [month]
  88. > * w - [month]
  89. > * d - [month, day, weekday]
  90. > * b - [month, day, weekday]
  91. > * h - [month, day, weekday, hour]
  92. > * t - [month, day, weekday, hour, *minute]
  93. >
  94. > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
  95. > * Q - [month]
  96. > * M - [month]
  97. > * W - [Day of month, week of year]
  98. > * D - [Day of week, day of month, day of year]
  99. > * B - [Day of week, day of month, day of year]
  100. > * H - [Hour of day, day of week, day of month, day of year]
  101. > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
  102. > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
  103. *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
  104. """
  105. if timeenc==0:
  106. dates['month'] = dates.date.apply(lambda row:row.month,1)
  107. dates['day'] = dates.date.apply(lambda row:row.day,1)
  108. dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
  109. dates['hour'] = dates.date.apply(lambda row:row.hour,1)
  110. dates['minute'] = dates.date.apply(lambda row:row.minute,1)
  111. dates['minute'] = dates.minute.map(lambda x:x//15)
  112. freq_map = {
  113. 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
  114. 'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
  115. 't':['month','day','weekday','hour','minute'],
  116. }
  117. return dates[freq_map[freq.lower()]].values
  118. if timeenc==1: # 走这条路
  119. list1 = []
  120. dates = pd.to_datetime(dates.date.values)
  121. for feat in time_features_from_frequency_str(freq):
  122. # print(feat(dates))
  123. list1.append(feat(dates))
  124. return np.vstack(list1).transpose(1, 0)

数据集类

  1. class MyDataset(Dataset):
  2. def __init__(self, root_path, flag, size, features, data_path, target, inverse, timeenc, freq):
  3. # size [seq_len, label_len, pred_len]
  4. self.seq_len = size[0] # 96
  5. self.label_len = size[1] # 48
  6. self.pred_len = size[2] # 24
  7. assert flag in ['train', 'test', 'val']
  8. type_map = {'train':0, 'val':1, 'test':2}
  9. self.set_type = type_map[flag] # 0
  10. self.features = features # 'MS'
  11. self.target = target # 'OT'
  12. self.inverse = inverse # False
  13. self.timeenc = timeenc # 1
  14. self.freq = freq # 'h'
  15. self.root_path = root_path # './data/'
  16. self.data_path = data_path # 'ETTh1.csv'
  17. self.__read_data__()
  18. def __read_data__(self):
  19. self.scaler = StandardScaler()
  20. df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) # a dataframe of 17420 * 8
  21. border1s = [0, int(len(df_raw) * 0.6) - self.seq_len, int(len(df_raw) * 0.8) - self.seq_len] # border1s = [0, 10356, 13840]
  22. border2s = [int(len(df_raw) * 0.6), int(len(df_raw) * 0.8), int(len(df_raw) * 1)] # border2s = [10452, 13936, 17420]
  23. border1 = border1s[self.set_type] # 0
  24. border2 = border2s[self.set_type] # 10452
  25. if self.features=='M' or self.features=='MS':
  26. df_data = df_raw[df_raw.columns[1:]] # a dataframe of 17420 * 7 (without 'date' column)
  27. elif self.features=='S':
  28. df_data = df_raw[[self.target]]
  29. train_data = df_data[border1s[0]:border2s[0]] # a dataframe of 10452 * 7
  30. self.scaler.fit(train_data.values)
  31. data = self.scaler.transform(df_data.values) # a 2D-array of shape (17420, 7)
  32. df_stamp = df_raw[['date']][border1:border2] # a dataframe of 10452 * 1 (only 'date' column)
  33. df_stamp['date'] = pd.to_datetime(df_stamp.date) # convert to datatimens
  34. data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq) # a 2D-array of shape (10452, 4)
  35. self.data_x = data[border1:border2] # a 2D-array of shape (10452, 7)
  36. if self.inverse:
  37. self.data_y = df_data.values[border1:border2]
  38. else: # 走这条路
  39. self.data_y = data[border1:border2] # a 2D-array of shape (10452, 7)
  40. self.data_stamp = data_stamp # a 2D-array of shape (10452, 4)
  41. def __getitem__(self, index):
  42. s_begin = index
  43. s_end = s_begin + self.seq_len
  44. r_begin = s_end - self.label_len
  45. r_end = r_begin + self.label_len + self.pred_len
  46. seq_x = self.data_x[s_begin:s_end] # a 2D-array of shape (96, 7)
  47. if self.inverse:
  48. seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
  49. else: # 走这条路
  50. seq_y = self.data_y[r_begin:r_end] # a 2D-array of shape (72, 7)
  51. seq_x_mark = self.data_stamp[s_begin:s_end] # a 2D-array of shape (96, 4)
  52. seq_y_mark = self.data_stamp[r_begin:r_end] # a 2D-array of shape (72, 4)
  53. return seq_x, seq_y, seq_x_mark, seq_y_mark
  54. def __len__(self):
  55. return len(self.data_x) - self.seq_len - self.pred_len + 1 # 10452 - 96 - 24 + 1 = 10333
  56. def inverse_transform(self, data):
  57. return self.scaler.inverse_transform(data)

评价指标

  1. def RSE(pred, true):
  2. return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))
  3. def CORR(pred, true):
  4. u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0)
  5. d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
  6. return (u/d).mean(-1)
  7. def MAE(pred, true):
  8. return np.mean(np.abs(pred-true))
  9. def MSE(pred, true):
  10. return np.mean((pred-true)**2)
  11. def RMSE(pred, true):
  12. return np.sqrt(MSE(pred, true))
  13. def MAPE(pred, true):
  14. return np.mean(np.abs((pred - true) / true))
  15. def MSPE(pred, true):
  16. return np.mean(np.square((pred - true) / true))
  17. def metric(pred, true):
  18. mae = MAE(pred, true)
  19. mse = MSE(pred, true)
  20. rmse = RMSE(pred, true)
  21. mape = MAPE(pred, true)
  22. mspe = MSPE(pred, true)
  23. return mae,mse,rmse,mape,mspe

Exp类

  1. class Exp_Transformer:
  2. def __init__(self, args):
  3. self.args = args
  4. self.device = self._acquire_device()
  5. self.model = self._build_model().to(self.device)
  6. def _acquire_device(self):
  7. os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.gpu) # 指定使用显卡cuda:{0}
  8. device = torch.device('cuda:{}'.format(self.args.gpu))
  9. print('Use GPU: cuda:{}'.format(self.args.gpu)) # 这里输出
  10. return device
  11. def _build_model(self):
  12. model = Transformer(
  13. self.args.enc_in, # 7
  14. self.args.dec_in, # 7
  15. self.args.c_out, # 1
  16. self.args.seq_len, # 96
  17. self.args.label_len, # 48
  18. self.args.pred_len, # 24
  19. self.args.factor, # 5
  20. self.args.d_model, # 512
  21. self.args.n_heads, # 8
  22. self.args.e_layers, # 2
  23. self.args.d_layers, # 1
  24. self.args.d_ff, # 2048
  25. self.args.dropout, # 0.05
  26. self.args.freq, # 'h'
  27. self.args.activation, # 'gelu'
  28. self.args.output_attention, # False
  29. self.device # torch.device('cuda:{0}')
  30. ).float()
  31. return model
  32. def _get_data(self, flag):
  33. Data = MyDataset
  34. if flag == 'test':
  35. shuffle_flag = False
  36. drop_last = True
  37. batch_size = self.args.batch_size
  38. freq = self.args.freq
  39. elif flag=='pred':
  40. shuffle_flag = False
  41. drop_last = False
  42. batch_size = 1
  43. freq = self.args.freq
  44. Data = Dataset_Pred
  45. else: # flag == 'train' or flag == 'val'
  46. shuffle_flag = True
  47. drop_last = True
  48. batch_size = self.args.batch_size # 32
  49. data_set = Data(
  50. root_path=self.args.root_path, # './data/'
  51. flag=flag, # 'train'
  52. size=[self.args.seq_len, self.args.label_len, self.args.pred_len], # [96, 48, 24]
  53. features=self.args.features, # 'MS'
  54. data_path=self.args.data_path, # 'ETTh1.csv'
  55. target=self.args.target, # 'OT'
  56. inverse=self.args.inverse, # False
  57. timeenc=1,
  58. freq='h',
  59. ) # 训练时data_set是MyDataset类的一个对象
  60. print(flag, len(data_set)) # 这里输出
  61. data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last)
  62. return data_set, data_loader
  63. def train(self, setting):
  64. train_data, train_loader = self._get_data(flag = 'train')
  65. vali_data, vali_loader = self._get_data(flag = 'val')
  66. test_data, test_loader = self._get_data(flag = 'test')
  67. path = os.path.join(self.args.checkpoints, setting)
  68. if not os.path.exists(path):
  69. os.makedirs(path)
  70. time_now = time.time()
  71. train_steps = len(train_loader) # 266 = 8521 // 32
  72. early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
  73. model_optim = self._select_optimizer()
  74. criterion = self._select_criterion()
  75. hist = np.zeros(self.args.train_epochs)
  76. for epoch in range(self.args.train_epochs): # 循环6次
  77. iter_count = 0
  78. train_loss = []
  79. self.model.train()
  80. epoch_time = time.time()
  81. for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(train_loader):
  82. # batch_x is a tensor with Size([32, 96, 7])
  83. # batch_y is a tensor with Size([32, 72, 7])
  84. # batch_x_mark is a tensor with Size([32, 96, 4])
  85. # batch_y_mark is a tensor with Size([32, 72, 4])
  86. iter_count += 1
  87. model_optim.zero_grad()
  88. pred, true = self._process_one_batch(train_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
  89. loss = criterion(pred, true)
  90. train_loss.append(loss.item())
  91. if (i+1) % 100==0:
  92. print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) # 这里输出
  93. speed = (time.time() - time_now) / iter_count
  94. left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
  95. print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) # 这里输出
  96. iter_count = 0
  97. time_now = time.time()
  98. loss.backward()
  99. model_optim.step()
  100. print("Epoch: {} cost time: {}".format(epoch+1, time.time()-epoch_time)) # 这里输出
  101. train_loss = np.average(train_loss)
  102. hist[epoch] = train_loss
  103. vali_loss = self.vali(vali_data, vali_loader, criterion)
  104. test_loss = self.vali(test_data, test_loader, criterion)
  105. print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
  106. epoch + 1, train_steps, train_loss, vali_loss, test_loss)) # 这里输出
  107. early_stopping(vali_loss, self.model, path)
  108. if early_stopping.early_stop:
  109. print("Early stopping") # 这里输出
  110. break
  111. adjust_learning_rate(model_optim, epoch+1, self.args)
  112. plt.plot(hist, label="Training loss")
  113. plt.legend()
  114. plt.xticks(range(1, len(hist) + 1)) # len(hist) + 1 是因为范围不包括结束值
  115. plt.show()
  116. best_model_path = path+'/'+'checkpoint.pth'
  117. self.model.load_state_dict(torch.load(best_model_path))
  118. return self.model
  119. def vali(self, vali_data, vali_loader, criterion):
  120. self.model.eval()
  121. total_loss = []
  122. for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(vali_loader):
  123. pred, true = self._process_one_batch(vali_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
  124. loss = criterion(pred.detach().cpu(), true.detach().cpu())
  125. total_loss.append(loss)
  126. total_loss = np.average(total_loss)
  127. self.model.train()
  128. return total_loss
  129. def test(self, setting):
  130. test_data, test_loader = self._get_data(flag='test')
  131. self.model.eval()
  132. preds = []
  133. trues = []
  134. for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(test_loader):
  135. pred, true = self._process_one_batch(test_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
  136. preds.append(pred.detach().cpu().numpy())
  137. trues.append(true.detach().cpu().numpy())
  138. preds = np.array(preds)
  139. trues = np.array(trues)
  140. print('test shape:', preds.shape, trues.shape)
  141. preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
  142. trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
  143. print('test shape:', preds.shape, trues.shape)
  144. prediction = []
  145. ground_truth = []
  146. for i in range(preds.shape[0] - 1):
  147. prediction.append(preds[i][0].item())
  148. ground_truth.append(trues[i][0].item())
  149. for j in range(preds.shape[1] - 1):
  150. prediction.append(preds[-1][j+1].item())
  151. ground_truth.append(trues[-1][j+1].item())
  152. plt.plot(prediction, label='Prediction')
  153. plt.plot(ground_truth, label='Ground Truth')
  154. # 添加图例
  155. plt.legend()
  156. # 设置x轴和y轴的标签
  157. plt.xlabel('Index')
  158. plt.ylabel('Value')
  159. # 显示图表
  160. plt.show()
  161. # result save
  162. folder_path = './results/' + setting +'/'
  163. if not os.path.exists(folder_path):
  164. os.makedirs(folder_path)
  165. mae, mse, rmse, mape, mspe = metric(preds, trues)
  166. print('mse:{}, mae:{}'.format(mse, mae))
  167. np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
  168. np.save(folder_path+'pred.npy', preds)
  169. np.save(folder_path+'true.npy', trues)
  170. return
  171. def _select_optimizer(self):
  172. model_optim = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
  173. return model_optim
  174. def _select_criterion(self):
  175. criterion = nn.MSELoss()
  176. return criterion
  177. def _process_one_batch(self, dataset_object, batch_x, batch_y, batch_x_mark, batch_y_mark):
  178. batch_x = batch_x.float().to(self.device)
  179. batch_y = batch_y.float() # 这个不能放GPU上
  180. batch_x_mark = batch_x_mark.float().to(self.device)
  181. batch_y_mark = batch_y_mark.float().to(self.device)
  182. # decoder input
  183. if self.args.padding==0: # 走这条路
  184. dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float() # dec_inp is a tensor with Size([32, 24, 7])
  185. elif self.args.padding==1:
  186. dec_inp = torch.ones([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float()
  187. dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # dec_inp is a tensor with Size([32, 72, 7])
  188. # encoder - decoder
  189. if self.args.output_attention:
  190. outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
  191. else: # 走这条路
  192. outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) # outputs is a tensor with Size([32, 24, 1])
  193. if self.args.inverse:
  194. outputs = dataset_object.inverse_transform(outputs)
  195. f_dim = -1 if self.args.features=='MS' else 0
  196. batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) # batch_y is a tensor with Size([32, 24, 1])
  197. return outputs, batch_y

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/474521
推荐阅读
相关标签
  

闽ICP备14008679号