赞
踩
一、GRU的原理
GRU一共有2个门。
z:更新门,取sigmoid表示以前的信息是否需要更新。
r:重置门,取sigmoid类似于LSTM的忘记门,代表以前的信息是否需要重置。就是忘记Ht-1的内容。
h‘:输入+忘记该忘记的(ht-1 * r)(R=0时候直接进行忘记之前Ht-1的操作)
新的h:更新需要更新的,不需要更新的维持(z=1就是维持Ht-1的状态不变,z=0就是进行更新)
二、GRU的实现
import torch from torch import nn from d2l import torch as d2l batch_size,num_steps=32,35 train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps) def get_params(vocab_size,num_hiddens,device): num_inputs=num_outputs=vocab_size def normal(shape): return torch.randn(size=shape,device=device)*0.01 def three(): return (normal((num_inputs,num_hiddens)),normal((num_hiddens,num_hiddens)),torch.zeros(num_hiddens,device=device)) W_xz,W_hz,b_z=three() W_xr,W_hr,b_r=three() W_xh,W_hh,b_h=three() W_hq =normal((num_hiddens,num_outputs)) b_q=torch.zeros(num_outputs,device=device) params=[ W_xz,W_hz,b_z,W_xr,W_hr,b_r,W_hq] for param in params: param.requires_grad(True) return params def init_gru_state(batch_size,num_hiddens,device): return (torch.zeros((batch_size,num_hiddens),device=device)) def gru(inputs,state,params): W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_hq=params H, =state outputs =[] for X in inputs: z=torch.sigmoid((X@ W_xz)+(H@W_hz)+b_z) R=torch.sigmoid((X@W_xr)+(H@W_hr)+b_r) H_tilda=torch.tanh((X@W_xh)+((R*H)@H_tilda)) Y=H@W_hq+b_q outputs.append(Y) return torch.cat(outputs,dim=0),(H,)
三、简洁实现GRU
就是将gru层代替rnn层传入网络即可
import torch from torch import nn from torch.nn import functional as F from d2l import torch as d2l batch_size,num_steps=32,35 train_iter,vocab=d2l.load_data_time_machine(batch_size,num_steps) #自定义模型 num_hiddens = 256 vocab_size=20 num_inputs=vocab_size gru_layer=nn.GRU(num_inputs,num_hiddens) #初始化状态 state = torch.zeros((1,batch_size,num_steps)) X=torch.rand(size=(num_steps,batch_size,len(vocab))) class RNNModel(nn.Module): def __init__(self,rnn_layer,vocab_size,**kwargs): super(RNNModel, self).__init__(**kwargs) self.rnn =rnn_layer self.vocab_size=vocab_size self.num_hiddens = self.rnn.hidden_size if not self.rnn.bidirectional: self.num_direction=1 self.linear=nn.Linear(self.num_hiddens,self.vocab_size) else: self.num_direction=2 self.linear=nn.Linear(self.num_hiddens*2,self.vocab_size) def forward(self,inputs,state): X=F.one_hot(inputs.T.long(),self.vocab_size) X=X.to(torch.float32) Y,state =self.rnn(X,state) output = self.linear(Y.reshape((-1,Y.shape[-1]))) return output,state model=d2l.RNNModel(gru_layer,num_hiddens)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。