赞
踩
Transformer是一种基于自注意力机制(self-attention mechanism)的神经网络结构,用于自然语言处理(NLP)任务,如机器翻译、文本摘要、问答系统等。transformer主要对输入特征进行编码、解码等,在这里原理并不过多的介绍,我们通过实例来完成transformer在时间序列上的预测应用。
我们使用的是某一零件的耗用量,预测未来半年的该零件的耗用情况,时间维度是月,数据的示意图如下图所示
如上图所示,蓝色部分为训练数据,红色为测试部分。首先第一步要创建transformer模型
#模型创建 from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction config = TimeSeriesTransformerConfig( prediction_length=prediction_length, context_length=prediction_length * 2, lags_sequence=lags_sequence, num_time_features=len(time_features) + 1, num_static_categorical_features=1, # num_static_real_features = 1, cardinality=[len(train_dataset)], embedding_dimension=[2], encoder_layers=4, decoder_layers=4, d_model=22, ) model = TimeSeriesTransformerForPrediction(config)`
然后构建时间特征模块
from gluonts.time_feature import ( time_features_from_frequency_str, TimeFeature, get_lags_for_frequency, ) from gluonts.dataset.field_names import FieldName from gluonts.transform import ( AddAgeFeature, AddObservedValuesIndicator, AddTimeFeatures, AsNumpyArray, Chain, ExpectedNumInstanceSampler, InstanceSplitter, RemoveFields, SelectFields, SetField, TestSplitSampler, Transformation, ValidationSplitSampler, VstackFeatures, RenameFields, )
紧接着搭建训练样本、测试样本以及验证样本的数据加载器
from gluonts.itertools import Cyclic, IterableSlice, PseudoShuffled from gluonts.torch.util import IterableDataset from torch.utils.data import DataLoader from typing import Iterable def create_train_dataloader( config: PretrainedConfig, freq, data, batch_size: int, num_batches_per_epoch: int, shuffle_buffer_length: Optional[int] = None, **kwargs, ) -> Iterable: PREDICTION_INPUT_NAMES = [ "past_time_features", "past_values", "past_observed_mask", "future_time_features", ] if config.num_static_categorical_features > 0: PREDICTION_INPUT_NAMES.append("static_categorical_features") if config.num_static_real_features > 0: PREDICTION_INPUT_NAMES.append("static_real_features") TRAINING_INPUT_NAMES = PREDICTION_INPUT_NAMES + [ "future_values", "future_observed_mask", ] transformation = create_transformation(freq, config) transformed_data = transformation.apply(data, is_train=True) instance_splitter = create_instance_splitter(config, "train") + SelectFields( TRAINING_INPUT_NAMES ) training_instances = instance_splitter.apply( Cyclic(transformed_data) if shuffle_buffer_length is None else PseudoShuffled( Cyclic(transformed_data), shuffle_buffer_length=shuffle_buffer_length, ) ) return IterableSlice( iter( DataLoader( IterableDataset(training_instances), batch_size=batch_size, **kwargs, ) ), num_batches_per_epoch, ) #测试样本 def create_test_dataloader( config: PretrainedConfig, freq, data, batch_size: int, **kwargs, ): PREDICTION_INPUT_NAMES = [ "past_time_features", "past_values", "past_observed_mask", "future_time_features", ] if config.num_static_categorical_features > 0: PREDICTION_INPUT_NAMES.append("static_categorical_features") if config.num_static_real_features > 0: PREDICTION_INPUT_NAMES.append("static_real_features") transformation = create_transformation(freq, config) transformed_data = transformation.apply(data, is_train=False) instance_sampler = create_instance_splitter(config, "test") + SelectFields( PREDICTION_INPUT_NAMES ) testing_instances = instance_sampler.apply(transformed_data, is_train=False) return DataLoader( IterableDataset(testing_instances), batch_size=batch_size, **kwargs )
前向传播
outputs = model(
past_values=batch["past_values"],
past_time_features=batch["past_time_features"],
past_observed_mask=batch["past_observed_mask"],
static_categorical_features=batch["static_categorical_features"]
if config.num_static_categorical_features > 0
else None,
static_real_features=batch["static_real_features"]
if config.num_static_real_features > 0
else None,
future_values=batch["future_values"],
future_time_features=batch["future_time_features"],
future_observed_mask=batch["future_observed_mask"],
output_hidden_states=True,
)
紧接着就要对模型进行训练
from accelerate import Accelerator from torch.optim import AdamW accelerator = Accelerator() device = accelerator.device model.to(device) optimizer = AdamW(model.parameters(), lr=6e-4, betas=(0.9, 0.95), weight_decay=1e-1) model, optimizer, train_dataloader = accelerator.prepare( model, optimizer, train_dataloader, ) model.train() for epoch in range(40): for idx, batch in enumerate(train_dataloader): optimizer.zero_grad() outputs = model( static_categorical_features=batch["static_categorical_features"].to(device) if config.num_static_categorical_features > 0 else None, static_real_features=batch["static_real_features"].to(device) if config.num_static_real_features > 0 else None, past_time_features=batch["past_time_features"].to(device), past_values=batch["past_values"].to(device), future_time_features=batch["future_time_features"].to(device), future_values=batch["future_values"].to(device), past_observed_mask=batch["past_observed_mask"].to(device), future_observed_mask=batch["future_observed_mask"].to(device), ) loss = outputs.loss accelerator.backward(loss) optimizer.step() if idx % 100 == 0: print(loss.item())
transformer预测的结果并不是单一的值,而是一个集合,预测出在该时间序列的可能值,然后再根据具体的业务需求完成最后的值,下图展示了结果中的某一样本的预测值。经过分析后可以看出transformer训练后还是比较能够模拟出样本的变化规律。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。