赞
踩
import tensorflow as tf class Text_BiLSTM(object): def __init__(self,Config): self.config = Config # 占位符 self.input_x = tf.placeholder(tf.int32,[None,self.config.seq_length],name="input_x") # [None,n_classes] self.input_y = tf.placeholder(tf.float32,[None,self.config.num_classes],name="input_y") self.keep_prob = tf.placeholder(tf.float32,name="keep_porb") # 计数 self.global_step = tf.Variable(0,trainable=False,name = "global_step") # 损失 self.l2_loss = tf.constant(0.0) self.bi_lstm() def bi_lstm(self): with tf.name_scope("embedding"): # size: [vocab_size, embedding_size] self.embedding_table = tf.Variable(self.config.pre_training,dtype=tf.float32,name="embedding_table") # size:[ batch_size,vovab_size,embedding_size ] # 对应到RNN里: [ -1,max_time,n_inputs ] self.embeddings = tf.nn.embedding_lookup(self.embedding_table,self.input_x) with tf.name_scope("Bi_LSTM"): # 双向rnn: 构建基本LSTM单元 self.lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(self.config.state_size) self.lstm_be_cell = tf.contrib.rnn.BasicLSTMCell(self.config.state_size) # 初始化隐藏层 self.init_fw = self.lstm_fw_cell.zero_state(self.config.batch_size, dtype=tf.float32) self.init_be = self.lstm_be_cell.zero_state(self.config.batch_size, dtype=tf.float32) # 动态展开:调用函数,自动按照时间步循环更新,时间步由数据维度决定 # outputs # 它是time_steps步里所有的输出。 # 它的形状为 (output_fw, output_bw) # 其中单个维度:(batch_size, time_steps, cell.output_size), # 但要注意,tensorflow为了简便,其实这里的输出不是softmax后的记过,而是直接将隐藏计算出的 a(符号而已)直接输出 # final_state # final_states # state是最后一步的隐状态,同样分(fw,bw) # 单个形状为(batch_size, cell.state_size) # 由于是lstm,它有两个隐态:C和a所以,这是一个元组。 _, self.final_states = tf.nn.bidirectional_dynamic_rnn(self.lstm_fw_cell, self.lstm_be_cell, self.embeddings, initial_state_fw = self.init_fw, initial_state_bw = self.init_be) with tf.name_scope("concat"): # 提取前向和后向内的h单元 # [batch_size, state_size * 2 ] self.a_final_state = tf.concat([self.final_states[0].c,self.final_states[1].c],axis=1) # drop_out self.final_output = tf.nn.dropout(self.a_final_state, self.keep_prob) # 全连接层 , units:输出的维度大小,改变inputs的最后一维 # 也可以softmax self.logits = tf.layers.dense(self.final_output,units=self.config.num_classes, kernel_regularizer=tf.contrib.layers.l2_regularizer(self.config.l2_reg_lambda)) self.probs = tf.nn.softmax(self.logits, -1) # 返回最大数值的下标, axis=1 : 按行查找 self.pred_ids = tf.argmax(input=self.probs, axis=1) with tf.name_scope("loss"): # 损失函数,结果出来batch维度?? self.cross_entroy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,labels=self.input_y) self.loss = tf.reduce_mean(self.cross_entroy) with tf.name_scope('optimizer'): # 综合了 Momentum 和 RMSProp 方法, # 对每个参数保留一个学习率与一个根据过去梯度信息求得的指数衰减均值 optimizer = tf.train.AdamOptimizer(self.config.lr) # 防止梯度爆炸的clip截断操作 # 梯度和更新变量的元组对 gradients, variables = zip(*optimizer.compute_gradients(self.loss)) gradients, _ = tf.clip_by_global_norm(gradients, self.config.clip) self.optim = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step) with tf.name_scope('accuracy'): # 按行计算,相等的那就返回True,反正返回False,返回的值的矩阵维度和第一个参数一样 correct_pred = tf.equal(tf.argmax(self.input_y,1),self.pred_ids) # 所以求平均时要强转 self.acc = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
num_units = [128, 64]
cells = [BasicLSTMCell(num_units=n) for n in num_units]
stacked_rnn_cell = MultiRNNCell(cells)
one_cell = tf.nn.rnn_cell.LSTMCell(num_units=rnn_size)
decoder_cell = tf.nn.rnn_cell.MultiRNNCell([one_cell for _ in range(dec_num_layers)])
# decoder_cell = tf.nn.rnn_cell.MultiRNNCell([one_cell]*dec_num_layers])也是错误的
放上原话:
LSTM cell objects and one object is the copy of other (since the pointers of the two objects are same)
正确做法:
cell_list = [tf.nn.rnn_cell.LSTMCell(num_units=rnn_size) for _ in range(dec_num_layers)]
decoder_cell = tf.nn.rnn_cell.MultiRNNCell(cell_list)
我帮你验证了一下,请看仔细点,标记的那个为我页面查找的显示情况,证明它他唯一
知道了怎么正确用它,我我还想看一下,它到底在干什么。
首先,我们要知道,它的参数: list of RNNCells that will be composed in this order
他的目的:Create a RNN cell composed sequentially of a number of RNNCells,翻译过来就是:创建一个由多个RNN单元格按顺序组成的RNN单元格
很抽象,确实,他的源码更神奇…看完我觉得没必要深究,对于单向的:个人感觉就是做了个容器,把n个的RNN单元包了起来,从此可以宏观上看成一个。对于双向的:我参考的代码是分别对前向和后向做MultiRNNCell??我的直觉告诉我,这不就相当于前向和后向独立了吗??
单层的很容易想象:
但是我有一个疑问?
双向的LSTM多层到底是什么情况,他们是怎么连接的??(这里我指的是,前向的lstm和后向的lstm是每一层都concat到一起,是还是直到最后才concat)
我竟然没有找到图解!!!!行吧,我通过代码,分析了一波维度,先把现象列出来:
(这里的lstm_fw_cell,lstm_be_cell都是多层的)
_, self.states = tf.nn.bidirectional_dynamic_rnn(
self.lstm_fw_cell,
self.lstm_be_cell,
self.embeddings,
dtype=tf.float32,
)
with tf.name_scope("Bi_LSTM"): # 双向rnn: 构建基本LSTM单元,为了不出错,写的很干脆..... self.lstm_fw_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(self.config.state_size),output_keep_prob=(1 - self.config.bi_prob)) for _ in range(self.config.num_layers)]) self.lstm_be_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(self.config.state_size),output_keep_prob=(1 - self.config.bi_prob)) for _ in range(self.config.num_layers)]) _, self.states = tf.nn.bidirectional_dynamic_rnn( self.lstm_fw_cell, self.lstm_be_cell, self.embeddings, dtype=tf.float32) with tf.name_scope("concat"): self.output_fw = self.states[0] self.output_bw = self.states[1] # 原形状为[batch_size,max_len,hidden_num] # 提取前向和后向内的h单元 # [batch_size, state_size * 2 ] self.a_final_state = tf.concat([self.output_fw[-1].c,self.output_bw[-1].c],axis=-1) # drop_out self.final_output = tf.nn.dropout(self.a_final_state, self.keep_prob)
state_size = 128 # 隐藏参数维度,可以理解为记忆细胞的维度 # 正则化的参数 keep_prob = 0.5 # droppout bi_prob = 0.5 num_layers = 3 l2_reg_lambda = 0.5 # l2 regularization lambda lr = 0.0005 # learning rate lr_decay = 0.9 # learning rate decay clip = 6.0 # gradient clipping threshold num_epochs = 20 # epochs batch_size = 32 # batch_size
好惭愧,依然有过拟和现象…我fo了
上面的代码( 两个dropout ),在自己训练的词向量上的结果,效果不好。
No optimization over 1000 steps, stop training
Train acc is 0.8958333333333334
Value acc is 0.5242659443315797
MAX train acc is 1.0
MAX value acc is 0.6905766526019691
改成官方词向量,去了第一个dropout,效果如下:
Train acc is 0.9421875
Value acc is 0.6133028598218473
MAX train acc is 1.0
MAX value acc is 0.6690107829348335
注:这个效果是我做整个对比实验最好的,当然,其他参数如果用心调一下应该会更好,只是这里就这样了。
[1] tensorflow 双向LSTM搭建
https://www.jianshu.com/p/9c96354fc767
[2] Tensorflow中GRU和LSTM的权重初始化
http://cairohy.github.io/2017/05/05/ml-coding-summarize/Tensorflow%E4%B8%ADGRU%E5%92%8CLSTM%E7%9A%84%E6%9D%83%E9%87%8D%E5%88%9D%E5%A7%8B%E5%8C%96/
[3]tensorflow中的LSTM
https://jasdeep06.github.io/posts/Understanding-LSTM-in-Tensorflow-MNIST/
[4] 多层Bi_LSTM的代码(但是个人觉得有错,但整体还可以)
https://github.com/chilynn/sequence-labeling/blob/master/code/bilstm_crf/BILSTM_CRF.py
[5] No module named ‘tensorflow.models’ 的问题解决方法
https://blog.csdn.net/RineZ/article/details/81671382
[6] 下载tensorflow model的连接:
https://github.com/tensorflow/models
[7]Cannot stack LSTM with MultiRNNCell and dynamic_rnn
https://stackoverflow.com/questions/47371608/cannot-stack-lstm-with-multirnncell-and-dynamic-rnn
[8]using dynamic_rnn with multiRNN gives error
https://stackoverflow.com/questions/48865554/using-dynamic-rnn-with-multirnn-gives-error/53277463#53277463
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。