赞
踩
作业4主要是NMT任务。
(1)完成对不同长度的句子的填充操作,使其保持相同长度。(utils.py)文件
- def pad_sents(sents, pad_token):
-
- sents_padded = []
-
- ### YOUR CODE HERE (~6 Lines)
- max_len = len(sents[0])
- for sentence in sents[1:] :
- flag = len(sentence)
- if flag > max_len:
- max_len = flag
- for sentence in sents:
- if len(sentence)< max_len:
- for i in range(len(sentence),max_len):
- sentence.append(pad_token)
- sents_padded.append(sentence)
-
-
- ### END YOUR CODE
-
- return sents_padded
由于它没有给测试,所以自己造个句子测试。
- l = [['i','want','hate','you'],['i','think','you','are','bad'],['i','like','you']]
- print(pad_sents(l,'0')) #sentence,pad_token
-
- output:
- [['i', 'want', 'hate', 'you', '0'], ['i', 'think', 'you', 'are', 'bad'], ['i', 'like', 'you', '0', '0']]
(2)利用nn.embedding结构初始化source和target(model_embeddings.py)文件
- class ModelEmbeddings(nn.Module):
- def __init__(self, embed_size, vocab):
- super(ModelEmbeddings, self).__init__()
- self.embed_size = embed_size
-
- # default values
- self.source = None
- self.target = None
-
- src_pad_token_idx = vocab.src['<pad>']
- tgt_pad_token_idx = vocab.tgt['<pad>']
- ### YOUR CODE HERE (~2 Lines)
- self.source = nn.Embedding(len(vocab.src),self.embed_size,src_pad_token_idx)
- self.target = nn.Embedding(len(vocab.tgt),self.embed_size,tgt_pad_token_idx)
-
- ### END YOUR CODE
(3) 建立NMT的网络结构,其中encoder利用的是Bi-LSTM,coder利用的是LSTM,加了一个多头attention机制。
- class NMT(nn.Module):
- def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2):
- super(NMT, self).__init__()
- self.model_embeddings = ModelEmbeddings(embed_size, vocab)
- ## 初始化ModelEmbeddings类
- self.hidden_size = hidden_size
- self.dropout_rate = dropout_rate
- self.vocab = vocab
-
- # default values
- self.encoder = None
- self.decoder = None
- self.h_projection = None
- self.c_projection = None
- self.att_projection = None
- self.combined_output_projection = None
- self.target_vocab_projection = None
- self.dropout = None
-
- ### YOUR CODE HERE (~8 Lines)
-
- self.encoder = nn.LSTM(embed_size,hidden_size,bidirectional=True)
- self.decoder = nn.LSTMCell(hidden_size+embed_size,hidden_size,bias=True)
-
- self.h_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
- self.c_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
- self.att_projection = nn.Linear(hidden_size*2,hidden_size,bias=False)
-
- self.combined_output_projection = nn.Linear(hidden_size*3,hidden_size,bias=False)
- self.target_vocab_projection = nn.Linear(hidden_size,len(vocab.tgt),bias=False)
- self.dropout = nn.Dropout(dropout_rate)
-
- ### END YOUR CODE
这块看起来还是比较简单的,因为各个连接层的维度,PDF作业中已经说明了。
(d)主要是建立encode过程
input:source_sentence
output:每个h,最后一个h和c(最后一个h和c主要是用作为decoder的input的)(每个h主要是用来做attention的)
- def encode(self, source_padded: torch.Tensor, source_lengths: List[int]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
-
- enc_hiddens, dec_init_state = None, None
- ### YOUR CODE HERE
- source_embeddings = self.model_embeddings.source(source_padded)
- #->(src_len, b, e)
- X = pack_padded_sequence(source_embeddings,source_lengths,batch_first=False,enforce_sorted=False)
- ### ->进行压缩处理,按列压缩,每列是一个句子
- ### batch_first=False(default,变成 b scr_len *)
- ### enforce_sorted=True(default,句子按照source_lengths排序)
- ### https://www.cnblogs.com/sbj123456789/p/9834018.html
- enc_hiddens,(last_hidden,last_cell) = self.encoder(X) ### Enconder
- ### enc_hiddens(src_len b, h*2)
- enc_hiddens = pad_packed_sequence(enc_hiddens,batch_first=True)
- enc_hiddens = enc_hiddens[0] ### ->(b,src_len,h*2)
- init_decoder_hidden = self.h_projection(torch.cat((last_hidden[0],last_hidden[1]),1))
- init_decoder_cell = self.c_projection(torch.cat((last_cell[0],last_cell[1]),1))
-
- dec_init_state = (init_decoder_hidden,init_decoder_cell)
- ### END YOUR CODE
-
- return enc_hiddens, dec_init_state
(e)主要是建立encode过程
(1)因为attention机制利用的是
所以后面那部分可以先计算出来,然后在等待每一个时间点的 h_dec,这块就是self.att_projection层的作用
(2)
- def decode(self, enc_hiddens: torch.Tensor, enc_masks: torch.Tensor,
- dec_init_state: Tuple[torch.Tensor, torch.Tensor], target_padded: torch.Tensor) -> torch.Tensor:
-
- # Chop of the <END> token for max length sentences.
- target_padded = target_padded[:-1]
-
- # Initialize the decoder state (hidden and cell)
- dec_state = dec_init_state
-
- # Initialize previous combined output vector o_{t-1} as zero
- batch_size = enc_hiddens.size(0) ### 句子个数
- o_prev = torch.zeros(batch_size, self.hidden_size, device=self.device)
-
- combined_outputs = []
- ### YOUR CODE HERE
- enc_hiddens_proj = self.att_projection(enc_hiddens)
- ###
- Y = self.model_embeddings.target(target_padded) #->(tgt_len, b, e)
- for ids,y_t in enumerate(torch.split(Y,1,0)): #->(1,b,e)
- y_t = torch.squeeze(y_t) #->(b,e)
- Ybar_t = torch.cat((y_t,o_prev),1) #->(b,e) +(b,h) = (b,e+h)
- ###增加一个维度
- o_t,cell,sate = self.step(Ybar_t,dec_state,enc_hiddens,enc_hiddens_proj,enc_masks)
- combined_outputs.append(o_t[0])
- o_prev = o_t[0]
-
- combined_outputs = torch.stack(combined_outputs,dim=0) ## ->(tgt_len, b, e)
- ### END YOUR CODE
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。