赞
踩
torch.nn.Transformer(d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation='relu', custom_encoder=None, custom_decoder=None)
- d_model – 编码器/解码器输入中预期词向量的大小(默认值= 512).
- nhead – 多头注意力模型中的头数(默认为8).
- num_encoder_layers – 编码器中子编码器层(transformer layers)的数量(默认为6).
- num_decoder_layers – 解码器中子解码器层的数量(默认为6).
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
- custom_encoder – 自定义编码器(默认=None).
- custom_decoder – 自定义解码器(默认=None).
from torch import nn
transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)
torch.nn.TransformerEncoder(encoder_layer, num_layers, norm=None)
- coder_layer – TransformerEncoderLayer()的实例(必需).
- num_layers –编码器中的子编码器(transformer layers)层数(必需).
- norm–图层归一化组件(可选).
from torch import nn
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(10, 32, 512)
out = transformer_encoder(src)
- coder_layer – TransformerEncoderLayer()的实例(必需).
- num_layers –编码器中的子编码器(transformer layers)层数(必需).
- norm–图层归一化组件(可选).
from torch import nn
decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
memory = torch.rand(10, 32, 512)
tgt = torch.rand(20, 32, 512)
out = transformer_decoder(tgt, memory)
torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu')
- d_model – 编码器/解码器输入中预期词向量的大小.
- nhead – 多头注意力模型中的头数.
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
src = torch.rand(10, 32, 512)
out = encoder_layer(src)
torch.nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu')
- d_model – 编码器/解码器输入中预期词向量的大小
- nhead – 多头注意力模型中的头数.
- dim_feedforward – 前馈网络模型的尺寸(默认值= 2048).
- dropout – dropout的比例 (默认值=0.1).
- activation – 编码器/解码器中间层,激活函数relu或gelu(默认=relu).
decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
memory = torch.rand(10, 32, 512)
tgt = torch.rand(20, 32, 512)
out = decoder_layer(tgt, memory)
import torch from torch import nn import torch.nn.functional as F from torch.autograd import Variable import math class PositionalEncoding(nn.Module): "Implement the PE function." def __init__(self, d_model = 300, dropout = 0.2, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) # Compute the positional encodings once in log space. pe = torch.zeros(max_len, d_model) position = torch.arange(0., max_len).unsqueeze(1) div_term = torch.exp(torch.arange(0., d_model, 2) * -(math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer('pe', pe) def forward(self, x): x = x + Variable(self.pe[:, :x.size(1)], requires_grad=False) return self.dropout(x) class Transformer(nn.Module): def __init__(self, vocab_size, label_size, mode='gru', bidirectional=True, cuda=True, is_training=True,intent_size = 26): super(Transformer, self).__init__() self.is_training = is_training embedding_dim = 300 hidden_size = 300 self.embedding = nn.Embedding(vocab_size, embedding_dim) self.rnn = nn.GRU(input_size=embedding_dim, hidden_size=hidden_size, bidirectional= False, batch_first=True) self.fc_slot = nn.Linear(300,label_size) self.fc_intent = nn.Linear(300,26) self.position = PositionalEncoding() encoder_layer = nn.TransformerEncoderLayer(d_model=300, nhead=4) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6) def forward(self, X): embed = self.embedding(X) embed = self.position(embed) embed = self.transformer_encoder(embed) #100 _, intent_outs = self.rnn(embed)
链接:官方文档
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。