赞
踩
时序预测小白,看到很多Transfirmer在时间序列预测上应用的代码,一头雾水,很烦,所以想自己记录一下,以求真正把Transformer怎么用在时间序列预测上讲清楚,捋明白。
我直接用代码来讲,用一个公开数据集(ETTh1)来做例子。
首先导包
- import sys
- import argparse
- import os
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import math
- import time
- import numpy as np
- import pandas as pd
- from torch.utils.data import Dataset, DataLoader
- from typing import List
- from pandas.tseries import offsets
- from pandas.tseries.frequencies import to_offset
- import random
- import matplotlib.pyplot as plt
这些包的作用后面都会有。
然后设置随机种子
- sys.argv = ['run.py']
- fix_seed = 2024
- random.seed(fix_seed)
- torch.manual_seed(fix_seed)
- np.random.seed(fix_seed)
然后传入参数
- parser = argparse.ArgumentParser(description='Transformer')
- parser.add_argument('--model', type=str, required=False, default='Transformer', help='model of experiment')
- parser.add_argument('--data', type=str, required=False, default='ETTh1', help='dataset')
- parser.add_argument('--root_path', type=str, default='./data/', help='root path of the data file')
- parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
- parser.add_argument('--features', type=str, default='MS', help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
- parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
- parser.add_argument('--freq', type=str, default='h', help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly]')
- parser.add_argument('--seq_len', type=int, default=96, help='input sequence length of Transformer encoder')
- parser.add_argument('--label_len', type=int, default=48, help='start token length of Transformer decoder')
- parser.add_argument('--pred_len', type=int, default=24, help='prediction sequence length')
- parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
- parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
- parser.add_argument('--c_out', type=int, default=7, help='output size')
- parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
- parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
- parser.add_argument('--e_layers', type=int, default=6, help='num of encoder layers')
- parser.add_argument('--d_layers', type=int, default=2, help='num of decoder layers')
- parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
- parser.add_argument('--padding', type=int, default=0, help='padding type')
- parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
- parser.add_argument('--activation', type=str, default='gelu',help='activation')
- parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
- parser.add_argument('--num_workers', type=int, default=0, help='data loader num workers')
- parser.add_argument('--train_epochs', type=int, default=6, help='number of train epochs')
- parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
- parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
- parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
- parser.add_argument('--loss', type=str, default='mse',help='loss function')
- parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
- parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
- parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
- parser.add_argument('--gpu', type=int, default=0, help='gpu')
- parser.add_argument('--is_rolling_predict', type=bool, default=True, help='rolling predict')
- parser.add_argument('--rolling_data_path', type=str, default='ETTh1-Test.csv', help='data file')
- parser.add_argument('--do_predict', action='store_true', default=True, help='whether to predict unseen future data')
- args = parser.parse_args()
- args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
- data_parser = {
- 'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
- 'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
- 'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
- 'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
- 'WTH':{'data':'WTH.csv','T':'WetBulbCelsius','M':[12,12,12],'S':[1,1,1],'MS':[12,12,1]},
- 'ECL':{'data':'ECL.csv','T':'MT_320','M':[321,321,321],'S':[1,1,1],'MS':[321,321,1]},
- 'Solar':{'data':'solar_AL.csv','T':'POWER_136','M':[137,137,137],'S':[1,1,1],'MS':[137,137,1]},
- }
- if args.data in data_parser.keys():
- data_info = data_parser[args.data]
- args.data_path = data_info['data']
- args.target = data_info['T'] # 'OT'
- args.enc_in, args.dec_in, args.c_out = data_info[args.features] # 7, 7, 1
-
- print('Args in experiment:')
- print(args)
可以看出我们设置的一些默认参数
然后下面就开始写类和函数了,共有12个类和3个函数。
首先是TokenEmbedding
- class TokenEmbedding(nn.Module):
- def __init__(self, c_in, d_model):
- super(TokenEmbedding, self).__init__()
- self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, kernel_size=3, padding=1, padding_mode='circular')
- # weight initialization
- for m in self.modules():
- if isinstance(m, nn.Conv1d):
- nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
-
-
- def forward(self, x):
- # x is a tensor with Size([32, 96, 7])
- x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
- # After Conv1d, x is a torch with Size([32, 96, 512])
- return x
然后是
- class PositionalEmbedding(nn.Module):
- def __init__(self, d_model, max_len=5000):
- super(PositionalEmbedding, self).__init__()
- # Compute the positional encodings once in log space.
- pe = torch.zeros(max_len, d_model).float() # pe is a tensor with Size([5000, 512]) with all 0.
- pe.require_grad = False
- position = torch.arange(0, max_len).float().unsqueeze(1) # position is a tensor with Size([5000, 1])
- div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() # div_term is a tensor with Size([256])
- # position * div_term is a tensor with Size([5000, 256])
- pe[:, 0::2] = torch.sin(position * div_term) # 把sin结果赋给奇数列
- pe[:, 1::2] = torch.cos(position * div_term) # 把cos结果赋给偶数列
- pe = pe.unsqueeze(0) # pe is a tensor with Size([1, 5000, 512])
- self.register_buffer('pe', pe)
-
-
- def forward(self, x):
- return self.pe[:, :x.size(1)] # return is a tensor with Size([1, 96, 512])
然后是时间特征的嵌入
- class TimeFeatureEmbedding(nn.Module):
- def __init__(self, d_model, freq='h'):
- super(TimeFeatureEmbedding, self).__init__()
- freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
- d_inp = freq_map[freq] # 4
- self.embed = nn.Linear(d_inp, d_model) # 4, 512
-
-
- def forward(self, x):
- return self.embed(x)
数据嵌入
- class DataEmbedding(nn.Module):
- def __init__(self, c_in, d_model, freq, dropout):
- super(DataEmbedding, self).__init__()
- self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
- self.position_embedding = PositionalEmbedding(d_model=d_model)
- self.time_feature_embedding = TimeFeatureEmbedding(d_model=d_model, freq=freq)
- self.dropout = nn.Dropout(p=dropout)
-
-
- def forward(self, x, x_mark):
- # x is a tensor with Size([32, 96, 7])
- # x_mark is a tensor with Size([32, 96, 4])
- x = self.value_embedding(x) + self.position_embedding(x) + self.time_feature_embedding(x_mark)
- return self.dropout(x) # return is a tensor with Size([32, 96, 512])
掩码的实现
- class TriangularCausalMask():
- def __init__(self, B, L, device="cpu"):
- mask_shape = [B, 1, L, L]
- with torch.no_grad():
- self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
-
- @property
- def mask(self):
- return self._mask
注意力机制
- class FullAttention(nn.Module):
- def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
- super(FullAttention, self).__init__()
- self.scale = scale # None
- self.mask_flag = mask_flag # False
- self.output_attention = output_attention # False
- self.dropout = nn.Dropout(attention_dropout) # attention_dropout = 0.05
-
-
- def forward(self, queries, keys, values, attn_mask):
- # shape of queries is [b, l, h, e]
- # shape of keys is [b, s, h, e]
- # shape of values is [b, s, h, d]
- B, L, H, E = queries.shape # 32, 96, 8, 64
- _, S, _, D = values.shape # 96 64
- scale = self.scale or 1. / math.sqrt(E) # 0.125
- scores = torch.einsum("blhe,bshe->bhls", queries, keys) # scores is a tensor with Size([32, 8, 96, 96])
-
- # self.mask_flag = False in encoder but self.mask_flag = True in decoder
- if self.mask_flag:
- if attn_mask is None:
- attn_mask = TriangularCausalMask(B, L, device=queries.device)
- scores.masked_fill_(attn_mask.mask, -np.inf)
-
- A = self.dropout(torch.softmax(scale * scores, dim=-1)) # A is a tensor with Size([32, 8, 96, 96])
- V = torch.einsum("bhls,bshd->blhd", A, values) # V is a tensor with Size([32, 96, 8, 64])
- if self.output_attention:
- return (V.contiguous(), A)
- else: # 走这条路
- return (V.contiguous(), None)
注意力层
- class AttentionLayer(nn.Module):
- def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
- super(AttentionLayer, self).__init__()
- d_keys = d_keys or (d_model // n_heads) # 512 / 8 = 64
- d_values = d_values or (d_model // n_heads) # 512 / 8 = 64
- self.inner_attention = attention # FullAttention(...)
- self.query_projection = nn.Linear(d_model, d_keys * n_heads) # 512 512
- self.key_projection = nn.Linear(d_model, d_keys * n_heads) # 512 512
- self.value_projection = nn.Linear(d_model, d_values * n_heads) # 512 512
- self.out_projection = nn.Linear(d_values * n_heads, d_model) # 512 512 # 线性拼接8个头
- self.n_heads = n_heads # 8
-
-
- def forward(self, queries, keys, values, attn_mask):
- B, L, _ = queries.shape # B = 32, L = 96
- _, S, _ = keys.shape # S = 96
- H = self.n_heads # H = 8
- queries = self.query_projection(queries).view(B, L, H, -1) # queries is a tensor with Size([32, 96, 8, 64])
- keys = self.key_projection(keys).view(B, S, H, -1) # keys is a tensor with Size([32, 96, 8, 64])
- values = self.value_projection(values).view(B, S, H, -1) # values is a tensor with Size([32, 96, 8, 64])
- out, attn = self.inner_attention(queries, keys, values, attn_mask) # out is a tensor with Size([32, 96, 8, 64])
- out = out.view(B, L, -1) # out is a tensor with Size([32, 96, 512])
- return self.out_projection(out), attn
编码器层
- class EncoderLayer(nn.Module):
- def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
- super(EncoderLayer, self).__init__()
- d_ff = d_ff or 4 * d_model # 512 * 4 = 2048
- self.attention = attention
- self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
- self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
- self.norm1 = nn.LayerNorm(d_model)
- self.norm2 = nn.LayerNorm(d_model)
- self.dropout = nn.Dropout(dropout)
- self.activation = F.relu if activation == "relu" else F.gelu
-
-
- def forward(self, x, attn_mask=None):
- # x [B, L, D] 32, 96, 512
- new_x, attn = self.attention(x, x, x, attn_mask = attn_mask) # new_x is a tensor with Size([32, 96, 512])
- x = x + self.dropout(new_x) # connection # x is a tensor with Size([32, 96, 512])
- y = x = self.norm1(x) # x and y are a tensor with Size([32, 96, 512])
- y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
- y = self.dropout(self.conv2(y).transpose(-1,1)) # y is a tensor with Size([32, 96, 512])
- return self.norm2(x+y), attn
编码器
- class Encoder(nn.Module):
- def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
- super(Encoder, self).__init__()
- self.attn_layers = nn.ModuleList(attn_layers)
- self.norm = norm_layer
-
-
- def forward(self, x, attn_mask=None):
- # x [B, L, D] [32, 96, 512]
- attns = []
- for attn_layer in self.attn_layers:
- x, attn = attn_layer(x, attn_mask=attn_mask)
- attns.append(attn)
-
- if self.norm is not None:
- x = self.norm(x)
- return x, attns
解码器层
- class DecoderLayer(nn.Module):
- def __init__(self, self_attention, cross_attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
- super(DecoderLayer, self).__init__()
- d_ff = d_ff or 4 * d_model
- self.self_attention = self_attention
- self.cross_attention = cross_attention
- self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
- self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
- self.norm1 = nn.LayerNorm(d_model)
- self.norm2 = nn.LayerNorm(d_model)
- self.norm3 = nn.LayerNorm(d_model)
- self.dropout = nn.Dropout(dropout)
- self.activation = F.relu if activation == "relu" else F.gelu
-
- def forward(self, x, cross, x_mask=None, cross_mask=None):
- x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
- x = self.norm1(x)
- x = x + self.dropout(self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0])
- y = x = self.norm2(x)
- y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
- y = self.dropout(self.conv2(y).transpose(-1,1))
- return self.norm3(x+y)
解码器
- class Decoder(nn.Module):
- def __init__(self, layers, norm_layer=None):
- super(Decoder, self).__init__()
- self.layers = nn.ModuleList(layers)
- self.norm = norm_layer
-
-
- def forward(self, x, cross, x_mask=None, cross_mask=None):
- # x is the output of decoder_embedding
- # cross is the output of encoder
- for layer in self.layers:
- x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
- if self.norm is not None:
- x = self.norm(x)
- return x
最重要的Transformer类
- class Transformer(nn.Module):
- def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, pred_len, factor, d_model, n_heads, e_layers, d_layers, d_ff,
- dropout, freq, activation, output_attention, device):
- super(Transformer, self).__init__()
- self.pred_len = pred_len # 24
- self.output_attention = output_attention # False
- # Embedding
- self.enc_embedding = DataEmbedding(enc_in, d_model, freq, dropout)
- self.dec_embedding = DataEmbedding(dec_in, d_model, freq, dropout)
-
- # Encoder
- self.encoder = Encoder(
- [
- EncoderLayer(
- AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=output_attention), d_model, n_heads),
- d_model,
- d_ff,
- dropout=dropout,
- activation=activation
- ) for l in range(e_layers)
- ],
- norm_layer=torch.nn.LayerNorm(d_model)
- )
-
- # Decoder
- self.decoder = Decoder(
- [
- DecoderLayer(
- AttentionLayer(FullAttention(True, factor, attention_dropout=dropout, output_attention=True), d_model, n_heads),
- AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), d_model, n_heads),
- d_model,
- d_ff,
- dropout=dropout,
- activation=activation,
- )
- for l in range(d_layers)
- ],
- norm_layer=nn.LayerNorm(d_model)
- )
- self.projection = nn.Linear(d_model, c_out, bias=True)
-
-
- def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
- enc_embedding_out = self.enc_embedding(x_enc, x_mark_enc) # enc_out is a tensor with Size([32, 96, 512])
- enc_out, attns = self.encoder(enc_embedding_out, attn_mask=enc_self_mask) # enc_out is a tensor with Size([32, 96, 512])
- dec_embedding_out = self.dec_embedding(x_dec, x_mark_dec)
- dec_out = self.decoder(dec_embedding_out, enc_out, x_mask=dec_self_mask) # dec_out is a tensor with Size([32, 72, 512])
- model_output = self.projection(dec_out) # enc_out is a tensor with Size([32, 72, 1])
- return model_output[:, -self.pred_len:, :] # [B, L, D] [32, 24, 1]
调整学习率的方法
- def adjust_learning_rate(optimizer, epoch, args):
- if args.lradj=='type1': # 走这条路
- lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch-1) // 1))}
- elif args.lradj=='type2':
- lr_adjust = {
- 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
- 10: 5e-7, 15: 1e-7, 20: 5e-8
- }
-
- if epoch in lr_adjust.keys():
- lr = lr_adjust[epoch]
- for param_group in optimizer.param_groups:
- param_group['lr'] = lr
- print('Updating learning rate to {}'.format(lr)) # 这里输出
早停类
- class EarlyStopping:
- def __init__(self, patience, verbose, delta=0):
- self.patience = patience # 3
- self.verbose = verbose # True
- self.counter = 0
- self.best_score = None
- self.early_stop = False
- self.val_loss_min = np.Inf
- self.delta = delta
-
- def __call__(self, val_loss, model, path):
- score = -val_loss
- if self.best_score is None:
- self.best_score = score
- self.save_checkpoint(val_loss, model, path)
- elif score < self.best_score + self.delta:
- self.counter += 1
- print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
- if self.counter >= self.patience:
- self.early_stop = True
- else:
- self.best_score = score
- self.save_checkpoint(val_loss, model, path)
- self.counter = 0
-
- def save_checkpoint(self, val_loss, model, path):
- if self.verbose:
- print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') # 这里输出
- torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
- self.val_loss_min = val_loss
数据预处理
- class StandardScaler():
- def __init__(self):
- self.mean = 0.
- self.std = 1.
-
- def fit(self, data):
- self.mean = data.mean(0)
- self.std = data.std(0)
-
- def transform(self, data):
- mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
- std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
- return (data - mean) / std
-
- def inverse_transform(self, data):
- mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
- std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
- if data.shape[-1] != mean.shape[-1]:
- mean = mean[-1:]
- std = std[-1:]
- return (data * std) + mean
时间特征的提取
- class TimeFeature:
- def __init__(self):
- pass
-
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- pass
-
- def __repr__(self):
- return self.__class__.__name__ + "()"
-
- class SecondOfMinute(TimeFeature):
- """Minute of hour encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return index.second / 59.0 - 0.5
-
- class MinuteOfHour(TimeFeature):
- """Minute of hour encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return index.minute / 59.0 - 0.5
-
- class HourOfDay(TimeFeature):
- """Hour of day encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return index.hour / 23.0 - 0.5
-
- class DayOfWeek(TimeFeature):
- """Hour of day encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return index.dayofweek / 6.0 - 0.5
-
- class DayOfMonth(TimeFeature):
- """Day of month encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return (index.day - 1) / 30.0 - 0.5
-
- class DayOfYear(TimeFeature):
- """Day of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return (index.dayofyear - 1) / 365.0 - 0.5
-
- class MonthOfYear(TimeFeature):
- """Month of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return (index.month - 1) / 11.0 - 0.5
-
- class WeekOfYear(TimeFeature):
- """Week of year encoded as value between [-0.5, 0.5]"""
- def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
- return (index.week - 1) / 52.0 - 0.5
-
- def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
- features_by_offsets = {
- offsets.YearEnd: [],
- offsets.QuarterEnd: [MonthOfYear],
- offsets.MonthEnd: [MonthOfYear],
- offsets.Week: [DayOfMonth, WeekOfYear],
- offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
- offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
- offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], # 找到这一项
- offsets.Minute: [
- MinuteOfHour,
- HourOfDay,
- DayOfWeek,
- DayOfMonth,
- DayOfYear,
- ],
- offsets.Second: [
- SecondOfMinute,
- MinuteOfHour,
- HourOfDay,
- DayOfWeek,
- DayOfMonth,
- DayOfYear,
- ],
- }
-
- offset = to_offset(freq_str) # <Hour>
- for offset_type, feature_classes in features_by_offsets.items():
- if isinstance(offset, offset_type):
- return [cls() for cls in feature_classes] # return [HourOfDay(), DayOfWeek(), DayOfMonth(), DayOfYear()]
-
- supported_freq_msg = f"""
- Unsupported frequency {freq_str}
- The following frequencies are supported:
- Y - yearly
- alias: A
- M - monthly
- W - weekly
- D - daily
- B - business days
- H - hourly
- T - minutely
- alias: min
- S - secondly
- """
- raise RuntimeError(supported_freq_msg)
-
- def time_features(dates, timeenc, freq):
- """
- > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
- > * m - [month]
- > * w - [month]
- > * d - [month, day, weekday]
- > * b - [month, day, weekday]
- > * h - [month, day, weekday, hour]
- > * t - [month, day, weekday, hour, *minute]
- >
- > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
- > * Q - [month]
- > * M - [month]
- > * W - [Day of month, week of year]
- > * D - [Day of week, day of month, day of year]
- > * B - [Day of week, day of month, day of year]
- > * H - [Hour of day, day of week, day of month, day of year]
- > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
- > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
- *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
- """
- if timeenc==0:
- dates['month'] = dates.date.apply(lambda row:row.month,1)
- dates['day'] = dates.date.apply(lambda row:row.day,1)
- dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
- dates['hour'] = dates.date.apply(lambda row:row.hour,1)
- dates['minute'] = dates.date.apply(lambda row:row.minute,1)
- dates['minute'] = dates.minute.map(lambda x:x//15)
- freq_map = {
- 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
- 'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
- 't':['month','day','weekday','hour','minute'],
- }
- return dates[freq_map[freq.lower()]].values
- if timeenc==1: # 走这条路
- list1 = []
- dates = pd.to_datetime(dates.date.values)
- for feat in time_features_from_frequency_str(freq):
- # print(feat(dates))
- list1.append(feat(dates))
-
- return np.vstack(list1).transpose(1, 0)
数据集类
- class MyDataset(Dataset):
- def __init__(self, root_path, flag, size, features, data_path, target, inverse, timeenc, freq):
- # size [seq_len, label_len, pred_len]
- self.seq_len = size[0] # 96
- self.label_len = size[1] # 48
- self.pred_len = size[2] # 24
- assert flag in ['train', 'test', 'val']
- type_map = {'train':0, 'val':1, 'test':2}
- self.set_type = type_map[flag] # 0
- self.features = features # 'MS'
- self.target = target # 'OT'
- self.inverse = inverse # False
- self.timeenc = timeenc # 1
- self.freq = freq # 'h'
- self.root_path = root_path # './data/'
- self.data_path = data_path # 'ETTh1.csv'
- self.__read_data__()
-
-
- def __read_data__(self):
- self.scaler = StandardScaler()
- df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path)) # a dataframe of 17420 * 8
- border1s = [0, int(len(df_raw) * 0.6) - self.seq_len, int(len(df_raw) * 0.8) - self.seq_len] # border1s = [0, 10356, 13840]
- border2s = [int(len(df_raw) * 0.6), int(len(df_raw) * 0.8), int(len(df_raw) * 1)] # border2s = [10452, 13936, 17420]
- border1 = border1s[self.set_type] # 0
- border2 = border2s[self.set_type] # 10452
-
- if self.features=='M' or self.features=='MS':
- df_data = df_raw[df_raw.columns[1:]] # a dataframe of 17420 * 7 (without 'date' column)
- elif self.features=='S':
- df_data = df_raw[[self.target]]
-
- train_data = df_data[border1s[0]:border2s[0]] # a dataframe of 10452 * 7
- self.scaler.fit(train_data.values)
- data = self.scaler.transform(df_data.values) # a 2D-array of shape (17420, 7)
- df_stamp = df_raw[['date']][border1:border2] # a dataframe of 10452 * 1 (only 'date' column)
- df_stamp['date'] = pd.to_datetime(df_stamp.date) # convert to datatimens
- data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq) # a 2D-array of shape (10452, 4)
- self.data_x = data[border1:border2] # a 2D-array of shape (10452, 7)
-
- if self.inverse:
- self.data_y = df_data.values[border1:border2]
- else: # 走这条路
- self.data_y = data[border1:border2] # a 2D-array of shape (10452, 7)
-
- self.data_stamp = data_stamp # a 2D-array of shape (10452, 4)
-
-
- def __getitem__(self, index):
- s_begin = index
- s_end = s_begin + self.seq_len
- r_begin = s_end - self.label_len
- r_end = r_begin + self.label_len + self.pred_len
- seq_x = self.data_x[s_begin:s_end] # a 2D-array of shape (96, 7)
- if self.inverse:
- seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
- else: # 走这条路
- seq_y = self.data_y[r_begin:r_end] # a 2D-array of shape (72, 7)
-
- seq_x_mark = self.data_stamp[s_begin:s_end] # a 2D-array of shape (96, 4)
- seq_y_mark = self.data_stamp[r_begin:r_end] # a 2D-array of shape (72, 4)
- return seq_x, seq_y, seq_x_mark, seq_y_mark
-
-
- def __len__(self):
- return len(self.data_x) - self.seq_len - self.pred_len + 1 # 10452 - 96 - 24 + 1 = 10333
-
-
- def inverse_transform(self, data):
- return self.scaler.inverse_transform(data)
评价指标
- def RSE(pred, true):
- return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))
-
- def CORR(pred, true):
- u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0)
- d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
- return (u/d).mean(-1)
-
- def MAE(pred, true):
- return np.mean(np.abs(pred-true))
-
- def MSE(pred, true):
- return np.mean((pred-true)**2)
-
- def RMSE(pred, true):
- return np.sqrt(MSE(pred, true))
-
- def MAPE(pred, true):
- return np.mean(np.abs((pred - true) / true))
-
- def MSPE(pred, true):
- return np.mean(np.square((pred - true) / true))
-
- def metric(pred, true):
- mae = MAE(pred, true)
- mse = MSE(pred, true)
- rmse = RMSE(pred, true)
- mape = MAPE(pred, true)
- mspe = MSPE(pred, true)
- return mae,mse,rmse,mape,mspe
Exp类
- class Exp_Transformer:
- def __init__(self, args):
- self.args = args
- self.device = self._acquire_device()
- self.model = self._build_model().to(self.device)
-
- def _acquire_device(self):
- os.environ["CUDA_VISIBLE_DEVICES"] = str(self.args.gpu) # 指定使用显卡cuda:{0}
- device = torch.device('cuda:{}'.format(self.args.gpu))
- print('Use GPU: cuda:{}'.format(self.args.gpu)) # 这里输出
- return device
-
- def _build_model(self):
- model = Transformer(
- self.args.enc_in, # 7
- self.args.dec_in, # 7
- self.args.c_out, # 1
- self.args.seq_len, # 96
- self.args.label_len, # 48
- self.args.pred_len, # 24
- self.args.factor, # 5
- self.args.d_model, # 512
- self.args.n_heads, # 8
- self.args.e_layers, # 2
- self.args.d_layers, # 1
- self.args.d_ff, # 2048
- self.args.dropout, # 0.05
- self.args.freq, # 'h'
- self.args.activation, # 'gelu'
- self.args.output_attention, # False
- self.device # torch.device('cuda:{0}')
- ).float()
- return model
-
-
- def _get_data(self, flag):
- Data = MyDataset
- if flag == 'test':
- shuffle_flag = False
- drop_last = True
- batch_size = self.args.batch_size
- freq = self.args.freq
- elif flag=='pred':
- shuffle_flag = False
- drop_last = False
- batch_size = 1
- freq = self.args.freq
- Data = Dataset_Pred
- else: # flag == 'train' or flag == 'val'
- shuffle_flag = True
- drop_last = True
- batch_size = self.args.batch_size # 32
-
- data_set = Data(
- root_path=self.args.root_path, # './data/'
- flag=flag, # 'train'
- size=[self.args.seq_len, self.args.label_len, self.args.pred_len], # [96, 48, 24]
- features=self.args.features, # 'MS'
- data_path=self.args.data_path, # 'ETTh1.csv'
- target=self.args.target, # 'OT'
- inverse=self.args.inverse, # False
- timeenc=1,
- freq='h',
- ) # 训练时data_set是MyDataset类的一个对象
- print(flag, len(data_set)) # 这里输出
- data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=shuffle_flag, num_workers=args.num_workers, drop_last=drop_last)
- return data_set, data_loader
-
-
- def train(self, setting):
- train_data, train_loader = self._get_data(flag = 'train')
- vali_data, vali_loader = self._get_data(flag = 'val')
- test_data, test_loader = self._get_data(flag = 'test')
- path = os.path.join(self.args.checkpoints, setting)
- if not os.path.exists(path):
- os.makedirs(path)
- time_now = time.time()
- train_steps = len(train_loader) # 266 = 8521 // 32
- early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
- model_optim = self._select_optimizer()
- criterion = self._select_criterion()
- hist = np.zeros(self.args.train_epochs)
-
- for epoch in range(self.args.train_epochs): # 循环6次
- iter_count = 0
- train_loss = []
- self.model.train()
- epoch_time = time.time()
- for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(train_loader):
- # batch_x is a tensor with Size([32, 96, 7])
- # batch_y is a tensor with Size([32, 72, 7])
- # batch_x_mark is a tensor with Size([32, 96, 4])
- # batch_y_mark is a tensor with Size([32, 72, 4])
- iter_count += 1
- model_optim.zero_grad()
- pred, true = self._process_one_batch(train_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
- loss = criterion(pred, true)
- train_loss.append(loss.item())
- if (i+1) % 100==0:
- print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) # 这里输出
- speed = (time.time() - time_now) / iter_count
- left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
- print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) # 这里输出
- iter_count = 0
- time_now = time.time()
-
- loss.backward()
- model_optim.step()
-
- print("Epoch: {} cost time: {}".format(epoch+1, time.time()-epoch_time)) # 这里输出
- train_loss = np.average(train_loss)
- hist[epoch] = train_loss
- vali_loss = self.vali(vali_data, vali_loader, criterion)
- test_loss = self.vali(test_data, test_loader, criterion)
-
- print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
- epoch + 1, train_steps, train_loss, vali_loss, test_loss)) # 这里输出
- early_stopping(vali_loss, self.model, path)
- if early_stopping.early_stop:
- print("Early stopping") # 这里输出
- break
-
- adjust_learning_rate(model_optim, epoch+1, self.args)
-
-
- plt.plot(hist, label="Training loss")
- plt.legend()
- plt.xticks(range(1, len(hist) + 1)) # len(hist) + 1 是因为范围不包括结束值
- plt.show()
- best_model_path = path+'/'+'checkpoint.pth'
- self.model.load_state_dict(torch.load(best_model_path))
- return self.model
-
-
- def vali(self, vali_data, vali_loader, criterion):
- self.model.eval()
- total_loss = []
- for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(vali_loader):
- pred, true = self._process_one_batch(vali_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
- loss = criterion(pred.detach().cpu(), true.detach().cpu())
- total_loss.append(loss)
- total_loss = np.average(total_loss)
- self.model.train()
- return total_loss
-
- def test(self, setting):
- test_data, test_loader = self._get_data(flag='test')
- self.model.eval()
-
- preds = []
- trues = []
-
- for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(test_loader):
- pred, true = self._process_one_batch(test_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
- preds.append(pred.detach().cpu().numpy())
- trues.append(true.detach().cpu().numpy())
-
- preds = np.array(preds)
- trues = np.array(trues)
- print('test shape:', preds.shape, trues.shape)
- preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
- trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
- print('test shape:', preds.shape, trues.shape)
- prediction = []
- ground_truth = []
- for i in range(preds.shape[0] - 1):
- prediction.append(preds[i][0].item())
- ground_truth.append(trues[i][0].item())
- for j in range(preds.shape[1] - 1):
- prediction.append(preds[-1][j+1].item())
- ground_truth.append(trues[-1][j+1].item())
-
- plt.plot(prediction, label='Prediction')
- plt.plot(ground_truth, label='Ground Truth')
- # 添加图例
- plt.legend()
- # 设置x轴和y轴的标签
- plt.xlabel('Index')
- plt.ylabel('Value')
- # 显示图表
- plt.show()
-
- # result save
- folder_path = './results/' + setting +'/'
- if not os.path.exists(folder_path):
- os.makedirs(folder_path)
-
- mae, mse, rmse, mape, mspe = metric(preds, trues)
- print('mse:{}, mae:{}'.format(mse, mae))
- np.save(folder_path+'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
- np.save(folder_path+'pred.npy', preds)
- np.save(folder_path+'true.npy', trues)
- return
-
-
- def _select_optimizer(self):
- model_optim = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
- return model_optim
-
-
- def _select_criterion(self):
- criterion = nn.MSELoss()
- return criterion
-
-
- def _process_one_batch(self, dataset_object, batch_x, batch_y, batch_x_mark, batch_y_mark):
- batch_x = batch_x.float().to(self.device)
- batch_y = batch_y.float() # 这个不能放GPU上
- batch_x_mark = batch_x_mark.float().to(self.device)
- batch_y_mark = batch_y_mark.float().to(self.device)
-
- # decoder input
- if self.args.padding==0: # 走这条路
- dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float() # dec_inp is a tensor with Size([32, 24, 7])
- elif self.args.padding==1:
- dec_inp = torch.ones([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float()
- dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) # dec_inp is a tensor with Size([32, 72, 7])
- # encoder - decoder
- if self.args.output_attention:
- outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
- else: # 走这条路
- outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) # outputs is a tensor with Size([32, 24, 1])
- if self.args.inverse:
- outputs = dataset_object.inverse_transform(outputs)
- f_dim = -1 if self.args.features=='MS' else 0
- batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) # batch_y is a tensor with Size([32, 24, 1])
- return outputs, batch_y
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。