赞
踩
常见的seq2seq模型都是encoder-decoder模型,主要由Encoder和Decoder两部分组成,这两部分大多数情况下均由RNN来实现,作用是解决输入和输出的长度不一致的问题。Encoder是将一连串的输入编码为单个向量,Decoder是将Encoder输出的单个向量逐步解码,一次输出一个结果,每次的输出会影响到下一次的输出,一般会在Decoder的开头加入“<BOS>”来表示开始解码,在Decoder的结尾加入“<EOS>”来表示输出结束。
输入一句英文,输出一句中文翻译。
import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torch.utils.data as data import torch.utils.data.sampler as sampler import torchvision from torchvision import datasets, transforms import numpy as np import sys import os import random import json import re import nltk from nltk.translate.bleu_score import sentence_bleu from nltk.translate.bleu_score import SmoothingFunction import matplotlib.pyplot as plt # 将不同长度的答案扩展到相同长度,以便训练模型 class LabelTransform(object): def __init__(self, size, pad): self.size = size self.pad = pad def __call__(self, label): label = np.pad(label, (0, (self.size - label.shape[0])), mode='constant', constant_values=self.pad) return label # 定义dataset class EN2CNDataset(data.Dataset): def __init__(self, root, max_output_len, set_name): self.root = root self.word2int_cn, self.int2word_cn = self.get_dictionary('cn') self.word2int_en, self.int2word_en = self.get_dictionary('en') self.data = [] with open(os.path.join(self.root, f'{set_name}.txt'), "r", encoding='UTF-8') as f: for line in f: self.data.append(line) print (f'{set_name} dataset size: {len(self.data)}') self.cn_vocab_size = len(self.word2int_cn) self.en_vocab_size = len(self.word2int_en) self.transform = LabelTransform(max_output_len, self.word2int_en['<PAD>']) def get_dictionary(self, language): with open(os.path.join(self.root, f'word2int_{language}.json'), "r", encoding='UTF-8') as f: word2int = json.load(f) with open(os.path.join(self.root, f'int2word_{language}.json'), "r", encoding='UTF-8') as f: int2word = json.load(f) return word2int, int2word def __len__(self): return len(self.data) def __getitem__(self, Index): # 先将中英文分开 sentences = self.data[Index] sentences = re.split('[\t\n]', sentences) sentences = list(filter(None, sentences)) #print (sentences) assert len(sentences) == 2 # 准备特殊字元 BOS = self.word2int_en['<BOS>'] EOS = self.word2int_en['<EOS>'] UNK = self.word2int_en['<UNK>'] # 在开头添加 <BOS>,在结尾添加 <EOS> ,不在字典的词用 <UNK> 取代 en, cn = [BOS], [BOS] # 将句子拆解为subword并转为整数 sentence = re.split(' ', sentences[0]) sentence = list(filter(None, sentence)) #print (f'en: {sentence}') for word in sentence: en.append(self.word2int_en.get(word, UNK)) en.append(EOS) # 将句子拆解为单词并转为整数 sentence = re.split(' ', sentences[1]) sentence = list(filter(None, sentence)) #print (f'cn: {sentence}') for word in sentence: cn.append(self.word2int_cn.get(word, UNK)) cn.append(EOS) en, cn = np.asarray(en), np.asarray(cn) # 用 <PAD> 将句子补到相同长度 en, cn = self.transform(en), self.transform(cn) en, cn = torch.LongTensor(en), torch.LongTensor(cn) return en, cn class Attention(nn.Module): def __init__(self, hid_dim): super(Attention, self).__init__() self.hid_dim = hid_dim def forward(self, encoder_outputs, decoder_hidden): attention = None return attention # 模型 class Encoder(nn.Module): def __init__(self, en_vocab_size, emb_dim, hid_dim, n_layers, dropout): super().__init__() self.embedding = nn.Embedding(en_vocab_size, emb_dim) self.hid_dim = hid_dim self.n_layers = n_layers self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, batch_first=True, bidirectional=True) self.dropout = nn.Dropout(dropout) def forward(self, input): # input = [batch size, sequence len, vocab size] # outputs = [batch size, sequence len, hid dim * directions] # hidden = [num_layers * directions, batch size , hid dim] embedding = self.embedding(input) # outputs 是最上层RNN的輸出 outputs, hidden = self.rnn(self.dropout(embedding)) return outputs, hidden class Decoder(nn.Module): def __init__(self, cn_vocab_size, emb_dim, hid_dim, n_layers, dropout, isatt): super().__init__() self.cn_vocab_size = cn_vocab_size self.hid_dim = hid_dim * 2 self.n_layers = n_layers self.embedding = nn.Embedding(cn_vocab_size, config.emb_dim) self.isatt = isatt self.attention = Attention(hid_dim) self.input_dim = emb_dim self.rnn = nn.GRU(self.input_dim, self.hid_dim, self.n_layers, dropout = dropout, batch_first=True) self.embedding2vocab1 = nn.Linear(self.hid_dim, self.hid_dim * 2) self.embedding2vocab2 = nn.Linear(self.hid_dim * 2, self.hid_dim * 4) self.embedding2vocab3 = nn.Linear(self.hid_dim * 4, self.cn_vocab_size) self.dropout = nn.Dropout(dropout) def forward(self, input, hidden, encoder_outputs): # input = [batch size, vocab size] # hidden = [batch size, n layers * directions, hid dim] # Decoder是单向的,所以 directions=1 input = input.unsqueeze(1) embedded = self.dropout(self.embedding(input)) # embedded = [batch size, 1, emb dim] if self.isatt: attn = self.attention(encoder_outputs, hidden) output, hidden = self.rnn(embedded, hidden) # output = [batch size, 1, hid dim] # hidden = [num_layers, batch size, hid dim] # 将RNN的输出转为每个词出现的概率 output = self.embedding2vocab1(output.squeeze(1)) output = self.embedding2vocab2(output) prediction = self.embedding2vocab3(output) # prediction = [batch size, vocab size] return prediction, hidden class Seq2Seq(nn.Module): def __init__(self, encoder, decoder, device): super().__init__() self.encoder = encoder self.decoder = decoder self.device = device assert encoder.n_layers == decoder.n_layers, \ "Encoder and decoder must have equal number of layers!" def forward(self, input, target, teacher_forcing_ratio): # input = [batch size, input len, vocab size] # target = [batch size, target len, vocab size] # teacher_forcing_ratio 是有多少概率使用正确答案来训练 batch_size = target.shape[0] target_len = target.shape[1] vocab_size = self.decoder.cn_vocab_size # 准备一个用来存储输出的空间 outputs = torch.zeros(batch_size, target_len, vocab_size).to(self.device) # 将输入放入Encoder encoder_outputs, hidden = self.encoder(input) # encoder_outputs 主要是使用在 Attention # 因为 Encoder 是双向的RNN,所以需要将同一层两个方向的 hidden state 接在一起 # hidden = [num_layers * directions, batch size , hid dim] --> [num_layers, directions, batch size , hid dim] hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1) hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2) input = target[:, 0] preds = [] for t in range(1, target_len): output, hidden = self.decoder(input, hidden, encoder_outputs) outputs[:, t] = output # 决定是否用正确答案来进行训练 teacher_force = random.random() <= teacher_forcing_ratio # 取出概率最大的单词 top1 = output.argmax(1) # 如果是 teacher force 就用正确答案来进行训练,如果不是就用自己预测的单词进行训练 input = target[:, t] if teacher_force and t < target_len else top1 preds.append(top1.unsqueeze(1)) preds = torch.cat(preds, 1) return outputs, preds def inference(self, input, target): # input = [batch size, input len, vocab size] # target = [batch size, target len, vocab size] batch_size = input.shape[0] input_len = input.shape[1] # 取得最大字数 vocab_size = self.decoder.cn_vocab_size outputs = torch.zeros(batch_size, input_len, vocab_size).to(self.device) encoder_outputs, hidden = self.encoder(input) # hidden = [num_layers * directions, batch size , hid dim] --> [num_layers, directions, batch size , hid dim] hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1) hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2) input = target[:, 0] preds = [] for t in range(1, input_len): output, hidden = self.decoder(input, hidden, encoder_outputs) # 将预测结果存起来 outputs[:, t] = output # 取出概率最大的单词 top1 = output.argmax(1) input = top1 preds.append(top1.unsqueeze(1)) preds = torch.cat(preds, 1) return outputs, preds def save_model(model, optimizer, store_model_path, step): torch.save(model.state_dict(), f'{store_model_path}/model_{step}.ckpt') return def load_model(model, load_model_path): print(f'Load model from {load_model_path}') model.load_state_dict(torch.load(f'{load_model_path}.ckpt')) return model def build_model(config, en_vocab_size, cn_vocab_size): encoder = Encoder(en_vocab_size, config.emb_dim, config.hid_dim, config.n_layers, config.dropout) decoder = Decoder(cn_vocab_size, config.emb_dim, config.hid_dim, config.n_layers, config.dropout, config.attention) model = Seq2Seq(encoder, decoder, device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) print(optimizer) if config.load_model: model = load_model(model, config.load_model_path) model = model.to(device) return model, optimizer # 数字转句子 def tokens2sentence(outputs, int2word): sentences = [] for tokens in outputs: sentence = [] for token in tokens: word = int2word[str(int(token))] if word == '<EOS>': break sentence.append(word) sentences.append(sentence) return sentences # 计算BLEU def computebleu(sentences, targets): score = 0 assert (len(sentences) == len(targets)) def cut_token(sentence): tmp = [] for token in sentence: if token == '<UNK>' or token.isdigit() or len(bytes(token[0], encoding='utf-8')) == 1: tmp.append(token) else: tmp += [word for word in token] return tmp for sentence, target in zip(sentences, targets): sentence = cut_token(sentence) target = cut_token(target) score += sentence_bleu([target], sentence, weights=(1, 0, 0, 0)) return score def infinite_iter(data_loader): it = iter(data_loader) while True: try: ret = next(it) yield ret except StopIteration: it = iter(data_loader) def schedule_sampling(): return 1 # 定义训练 def train(model, optimizer, train_iter, loss_function, total_steps, summary_steps, train_dataset): model.train() model.zero_grad() losses = [] loss_sum = 0.0 for step in range(summary_steps): sources, targets = next(train_iter) sources, targets = sources.to(device), targets.to(device) outputs, preds = model(sources, targets, schedule_sampling()) # targets 的第一个 token 是 <BOS> 所以忽略 outputs = outputs[:, 1:].reshape(-1, outputs.size(2)) targets = targets[:, 1:].reshape(-1) loss = loss_function(outputs, targets) optimizer.zero_grad() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() loss_sum += loss.item() if (step + 1) % 5 == 0: loss_sum = loss_sum / 5 print("\r", "train [{}] loss: {:.3f}, Perplexity: {:.3f} ".format(total_steps + step + 1, loss_sum, np.exp(loss_sum)), end=" ") losses.append(loss_sum) loss_sum = 0.0 return model, optimizer, losses def test(model, dataloader, loss_function): model.eval() loss_sum, bleu_score= 0.0, 0.0 n = 0 result = [] for sources, targets in dataloader: sources, targets = sources.to(device), targets.to(device) batch_size = sources.size(0) outputs, preds = model.inference(sources, targets) # targets 的第一个 token 是 <BOS> 所以忽略 outputs = outputs[:, 1:].reshape(-1, outputs.size(2)) targets = targets[:, 1:].reshape(-1) loss = loss_function(outputs, targets) loss_sum += loss.item() # 将预测结果转为文字 targets = targets.view(sources.size(0), -1) preds = tokens2sentence(preds, dataloader.dataset.int2word_cn) sources = tokens2sentence(sources, dataloader.dataset.int2word_en) targets = tokens2sentence(targets, dataloader.dataset.int2word_cn) for source, pred, target in zip(sources, preds, targets): result.append((source, pred, target)) # 计算 Bleu Score bleu_score += computebleu(preds, targets) n += batch_size return loss_sum / len(dataloader), bleu_score / n, result # 训练流程 def train_process(config): train_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'training') train_loader = data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) train_iter = infinite_iter(train_loader) val_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'validation') val_loader = data.DataLoader(val_dataset, batch_size=1) model, optimizer = build_model(config, train_dataset.en_vocab_size, train_dataset.cn_vocab_size) loss_function = nn.CrossEntropyLoss(ignore_index=0) train_losses, val_losses, bleu_scores = [], [], [] total_steps = 0 while (total_steps < config.num_steps): model, optimizer, loss = train(model, optimizer, train_iter, loss_function, total_steps, config.summary_steps, train_dataset) train_losses += loss val_loss, bleu_score, result = test(model, val_loader, loss_function) val_losses.append(val_loss) bleu_scores.append(bleu_score) total_steps += config.summary_steps print("\r", "val [{}] loss: {:.3f}, Perplexity: {:.3f}, blue score: {:.3f} ".format(total_steps, val_loss, np.exp(val_loss), bleu_score)) if total_steps % config.store_steps == 0 or total_steps >= config.num_steps: save_model(model, optimizer, config.store_model_path, total_steps) with open(f'{config.store_model_path}/output_{total_steps}.txt', 'w') as f: for line in result: print(line, file=f) return train_losses, val_losses, bleu_scores def test_process(config): test_dataset = EN2CNDataset(config.data_path, config.max_output_len, 'testing') test_loader = data.DataLoader(test_dataset, batch_size=1) model, optimizer = build_model(config, test_dataset.en_vocab_size, test_dataset.cn_vocab_size) print ("Finish build model") loss_function = nn.CrossEntropyLoss(ignore_index=0) model.eval() test_loss, bleu_score, result = test(model, test_loader, loss_function) with open(f'./test_output.txt', 'w') as f: for line in result: print (line, file=f) return test_loss, bleu_score class configurations(object): def __init__(self): self.batch_size = 60 self.emb_dim = 256 self.hid_dim = 512 self.n_layers = 3 self.dropout = 0.5 self.learning_rate = 0.00005 self.max_output_len = 50 # 最后输出句子的最大长度 self.num_steps = 12000 # 总训练次数 self.store_steps = 300 # 训练多少次后储存模型 self.summary_steps = 300 # 训练多少次后检验是否过拟合 self.load_model = False # 是否需要载入模型 self.store_model_path = "./ckpt" self.load_model_path = None self.data_path = "./cmn-eng" self.attention = False if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = configurations() print('config:\n', vars(config)) train_losses, val_losses, bleu_scores = train_process(config) plt.figure() plt.plot(train_losses) plt.xlabel('次数') plt.ylabel('loss') plt.title('train loss') plt.show() config = configurations() print('config:\n', vars(config)) test_loss, bleu_score = test_process(config) print(f'test loss: {test_loss}, bleu_score: {bleu_score}') plt.figure() plt.plot(val_losses) plt.xlabel('次数') plt.ylabel('loss') plt.title('validation loss') plt.show() plt.figure() plt.plot(bleu_scores) plt.xlabel('次数') plt.ylabel('BLEU score') plt.title('BLEU score') plt.show()
将不同长度的答案扩展到相同长度,以便训练模型。
# 将不同长度的答案扩展到相同长度,以便训练模型
class LabelTransform(object):
def __init__(self, size, pad):
self.size = size
self.pad = pad
def __call__(self, label):
label = np.pad(label, (0, (self.size - label.shape[0])), mode='constant', constant_values=self.pad)
return label
训练数据——18000句,检验数据——500句,测试数据——2636句。
# 定义dataset class EN2CNDataset(data.Dataset): def __init__(self, root, max_output_len, set_name): self.root = root self.word2int_cn, self.int2word_cn = self.get_dictionary('cn') self.word2int_en, self.int2word_en = self.get_dictionary('en') self.data = [] with open(os.path.join(self.root, f'{set_name}.txt'), "r", encoding='UTF-8') as f: for line in f: self.data.append(line) print (f'{set_name} dataset size: {len(self.data)}') self.cn_vocab_size = len(self.word2int_cn) self.en_vocab_size = len(self.word2int_en) self.transform = LabelTransform(max_output_len, self.word2int_en['<PAD>']) def get_dictionary(self, language): with open(os.path.join(self.root, f'word2int_{language}.json'), "r", encoding='UTF-8') as f: word2int = json.load(f) with open(os.path.join(self.root, f'int2word_{language}.json'), "r", encoding='UTF-8') as f: int2word = json.load(f) return word2int, int2word def __len__(self): return len(self.data) def __getitem__(self, Index): # 先将中英文分开 sentences = self.data[Index] sentences = re.split('[\t\n]', sentences) sentences = list(filter(None, sentences)) #print (sentences) assert len(sentences) == 2 # 准备特殊字元 BOS = self.word2int_en['<BOS>'] EOS = self.word2int_en['<EOS>'] UNK = self.word2int_en['<UNK>'] # 在开头添加 <BOS>,在结尾添加 <EOS> ,不在字典的词用 <UNK> 取代 en, cn = [BOS], [BOS] # 将句子拆解为subword并转为整数 sentence = re.split(' ', sentences[0]) sentence = list(filter(None, sentence)) #print (f'en: {sentence}') for word in sentence: en.append(self.word2int_en.get(word, UNK)) en.append(EOS) # 将句子拆解为单词并转为整数 sentence = re.split(' ', sentences[1]) sentence = list(filter(None, sentence)) #print (f'cn: {sentence}') for word in sentence: cn.append(self.word2int_cn.get(word, UNK)) cn.append(EOS) en, cn = np.asarray(en), np.asarray(cn) # 用 <PAD> 将句子补到相同长度 en, cn = self.transform(en), self.transform(cn) en, cn = torch.LongTensor(en), torch.LongTensor(cn) return en, cn
对于每个输入,Encoder会输出一个向量和一个隐藏状态,并将隐藏状态用于下一个输入。
参数 | 含义 |
---|---|
en_vocab_size | 英文字典的大小,即英文的subword的个数 |
emb_dim | embedding的维度,用于将one-hot vector的单词向量压缩到指定维度 |
hid_dim | 输出和隐藏状态的维度 |
n_layers | RNN的层数 |
Encoder的输出是outputs和hidden,outputs是最上层RNN全部的输出,可以再用Attention进行处理,hidden是每层最后的隐藏状态,会被传送到Decoder进行解码。
class Encoder(nn.Module): def __init__(self, en_vocab_size, emb_dim, hid_dim, n_layers, dropout): super().__init__() self.embedding = nn.Embedding(en_vocab_size, emb_dim) self.hid_dim = hid_dim self.n_layers = n_layers self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, dropout=dropout, batch_first=True, bidirectional=True) self.dropout = nn.Dropout(dropout) def forward(self, input): # input = [batch size, sequence len, vocab size] # outputs = [batch size, sequence len, hid dim * directions] # hidden = [num_layers * directions, batch size , hid dim] embedding = self.embedding(input) # outputs 是最上层RNN的輸出 outputs, hidden = self.rnn(self.dropout(embedding)) return outputs, hidden
Decoder也是一个RNN,使用Encoder每一层最后的隐藏状态来进行解码。
参数 | 含义 |
---|---|
en_vocab_size | 英文字典的大小,即英文的subword的个数 |
emb_dim | embedding的维度,用于将one-hot vector的单词向量压缩到指定维度 |
hid_dim | 输出和隐藏状态的维度 |
output_dim | 最终输出的维度 |
n_layers | RNN的层数 |
isatt | 标志是否使用Attention Mechanism |
Decoder的输出是hidden和output,hidden是根据输入和前一次的隐藏状态得到现在的隐藏状态更新的结果,output是每个字有多少概率是这次解码的结果。
class Decoder(nn.Module): def __init__(self, cn_vocab_size, emb_dim, hid_dim, n_layers, dropout, isatt): super().__init__() self.cn_vocab_size = cn_vocab_size self.hid_dim = hid_dim * 2 self.n_layers = n_layers self.embedding = nn.Embedding(cn_vocab_size, config.emb_dim) self.isatt = isatt self.attention = Attention(hid_dim) self.input_dim = emb_dim self.rnn = nn.GRU(self.input_dim, self.hid_dim, self.n_layers, dropout = dropout, batch_first=True) self.embedding2vocab1 = nn.Linear(self.hid_dim, self.hid_dim * 2) self.embedding2vocab2 = nn.Linear(self.hid_dim * 2, self.hid_dim * 4) self.embedding2vocab3 = nn.Linear(self.hid_dim * 4, self.cn_vocab_size) self.dropout = nn.Dropout(dropout) def forward(self, input, hidden, encoder_outputs): # input = [batch size, vocab size] # hidden = [batch size, n layers * directions, hid dim] # Decoder是单向的,所以 directions=1 input = input.unsqueeze(1) embedded = self.dropout(self.embedding(input)) # embedded = [batch size, 1, emb dim] if self.isatt: attn = self.attention(encoder_outputs, hidden) output, hidden = self.rnn(embedded, hidden) # output = [batch size, 1, hid dim] # hidden = [num_layers, batch size, hid dim] # 将RNN的输出转为每个词出现的概率 output = self.embedding2vocab1(output.squeeze(1)) output = self.embedding2vocab2(output) prediction = self.embedding2vocab3(output) # prediction = [batch size, vocab size] return prediction, hidden
当输入过长时,用Attention Mechanism来为Decoder提供更多的信息。
class Attention(nn.Module):
def __init__(self, hid_dim):
super(Attention, self).__init__()
self.hid_dim = hid_dim
def forward(self, encoder_outputs, decoder_hidden):
attention = None
return attention
将Encoder和Decoder进行合并。
class Seq2Seq(nn.Module): def __init__(self, encoder, decoder, device): super().__init__() self.encoder = encoder self.decoder = decoder self.device = device assert encoder.n_layers == decoder.n_layers, \ "Encoder and decoder must have equal number of layers!" def forward(self, input, target, teacher_forcing_ratio): # input = [batch size, input len, vocab size] # target = [batch size, target len, vocab size] # teacher_forcing_ratio 是有多少概率使用正确答案来训练 batch_size = target.shape[0] target_len = target.shape[1] vocab_size = self.decoder.cn_vocab_size # 准备一个用来存储输出的空间 outputs = torch.zeros(batch_size, target_len, vocab_size).to(self.device) # 将输入放入Encoder encoder_outputs, hidden = self.encoder(input) # encoder_outputs 主要是使用在 Attention # 因为 Encoder 是双向的RNN,所以需要将同一层两个方向的 hidden state 接在一起 # hidden = [num_layers * directions, batch size , hid dim] --> [num_layers, directions, batch size , hid dim] hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1) hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2) input = target[:, 0] preds = [] for t in range(1, target_len): output, hidden = self.decoder(input, hidden, encoder_outputs) outputs[:, t] = output # 决定是否用正确答案来进行训练 teacher_force = random.random() <= teacher_forcing_ratio # 取出概率最大的单词 top1 = output.argmax(1) # 如果是 teacher force 就用正确答案来进行训练,如果不是就用自己预测的单词进行训练 input = target[:, t] if teacher_force and t < target_len else top1 preds.append(top1.unsqueeze(1)) preds = torch.cat(preds, 1) return outputs, preds def inference(self, input, target): # input = [batch size, input len, vocab size] # target = [batch size, target len, vocab size] batch_size = input.shape[0] input_len = input.shape[1] # 取得最大字数 vocab_size = self.decoder.cn_vocab_size outputs = torch.zeros(batch_size, input_len, vocab_size).to(self.device) encoder_outputs, hidden = self.encoder(input) # hidden = [num_layers * directions, batch size , hid dim] --> [num_layers, directions, batch size , hid dim] hidden = hidden.view(self.encoder.n_layers, 2, batch_size, -1) hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2) input = target[:, 0] preds = [] for t in range(1, input_len): output, hidden = self.decoder(input, hidden, encoder_outputs) # 将预测结果存起来 outputs[:, t] = output # 取出概率最大的单词 top1 = output.argmax(1) input = top1 preds.append(top1.unsqueeze(1)) preds = torch.cat(preds, 1) return outputs, preds
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。