当前位置:   article > 正文

NLP对抗训练:PyTorch、Tensorflow_nlp学tensorflow还是pytroch

nlp学tensorflow还是pytroch

一、定义

对抗样本:对输入增加微小扰动得到的样本。旨在增加模型损失

对抗训练:训练模型去区分样例是真实样例还是对抗样本的过程。对抗训练不仅可以提升模型对对抗样本的防御能力,还能提升对原始样本的泛化能力

在这里插入图片描述

二、PyTorch版的NLP对抗训练

三、Tensorflow版的NLP对抗训练

  1. #! -*- coding: utf-8 -*-
  2. import keras
  3. import keras.backend as K
  4. def search_layer(inputs, name, exclude=None):
  5. """根据inputs和name来搜索层
  6. 说明:inputs为某个层或某个层的输出;name为目标层的名字。
  7. 实现:根据inputs一直往上递归搜索,直到发现名字为name的层为止;
  8. 如果找不到,那就返回None。
  9. """
  10. if exclude is None:
  11. exclude = set()
  12. if isinstance(inputs, keras.layers.Layer):
  13. layer = inputs
  14. else:
  15. layer = inputs._keras_history[0]
  16. if layer.name == name:
  17. return layer
  18. elif layer in exclude:
  19. return None
  20. else:
  21. exclude.add(layer)
  22. inbound_layers = layer._inbound_nodes[0].inbound_layers
  23. if not isinstance(inbound_layers, list):
  24. inbound_layers = [inbound_layers]
  25. if len(inbound_layers) > 0:
  26. for layer in inbound_layers:
  27. layer = search_layer(layer, name, exclude)
  28. if layer is not None:
  29. return layer
  30. def adversarial_training(model, embedding_name, epsilon=1):
  31. """给模型添加对抗训练
  32. 其中model是需要添加对抗训练的keras模型,embedding_name
  33. 则是model里边Embedding层的名字。要在模型compile之后使用。
  34. """
  35. if model.train_function is None: # 如果还没有训练函数
  36. model._make_train_function() # 手动make
  37. old_train_function = model.train_function # 备份旧的训练函数
  38. # 查找Embedding层
  39. for output in model.outputs:
  40. embedding_layer = search_layer(output, embedding_name)
  41. if embedding_layer is not None:
  42. break
  43. if embedding_layer is None:
  44. raise Exception('Embedding layer not found')
  45. # 求Embedding梯度
  46. embeddings = embedding_layer.embeddings # Embedding矩阵
  47. gradients = K.gradients(model.total_loss, [embeddings]) # Embedding梯度
  48. gradients = K.zeros_like(embeddings) + gradients[0] # 转为dense tensor
  49. # 封装为函数
  50. inputs = (model._feed_inputs +
  51. model._feed_targets +
  52. model._feed_sample_weights) # 所有输入层
  53. embedding_gradients = K.function(
  54. inputs=inputs,
  55. outputs=[gradients],
  56. name='embedding_gradients',
  57. ) # 封装为函数
  58. def train_function(inputs): # 重新定义训练函数
  59. grads = embedding_gradients(inputs)[0] # Embedding梯度
  60. delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8) # 计算扰动
  61. K.set_value(embeddings, K.eval(embeddings) + delta) # 注入扰动
  62. outputs = old_train_function(inputs) # 梯度下降
  63. K.set_value(embeddings, K.eval(embeddings) - delta) # 删除扰动
  64. return outputs
  65. model.train_function = train_function # 覆盖原训练函数

案例:

https://github.com/bojone/bert4keras/blob/master/examples/task_iflytek_adversarial_training.py

  1. #! -*- coding:utf-8 -*-
  2. # 通过对抗训练增强模型的泛化性能
  3. # 比CLUE榜单公开的同数据集上的BERT base的成绩高2%
  4. # 数据集:IFLYTEK' 长文本分类 (https://github.com/CLUEbenchmark/CLUE)
  5. # 博客:https://kexue.fm/archives/7234
  6. # 适用于Keras 2.3.1
  7. import json
  8. import numpy as np
  9. from bert4keras.backend import keras, search_layer, K
  10. from bert4keras.tokenizers import Tokenizer
  11. from bert4keras.models import build_transformer_model
  12. from bert4keras.optimizers import Adam
  13. from bert4keras.snippets import sequence_padding, DataGenerator
  14. from keras.layers import Lambda, Dense
  15. from tqdm import tqdm
  16. num_classes = 119
  17. maxlen = 128
  18. batch_size = 32
  19. # BERT base
  20. config_path = '/root/kg/bert/chinese_L-12_H-768_A-12/bert_config.json'
  21. checkpoint_path = '/root/kg/bert/chinese_L-12_H-768_A-12/bert_model.ckpt'
  22. dict_path = '/root/kg/bert/chinese_L-12_H-768_A-12/vocab.txt'
  23. def load_data(filename):
  24. """加载数据
  25. 单条格式:(文本, 标签id)
  26. """
  27. D = []
  28. with open(filename) as f:
  29. for i, l in enumerate(f):
  30. l = json.loads(l)
  31. text, label = l['sentence'], l['label']
  32. D.append((text, int(label)))
  33. return D
  34. # 加载数据集
  35. train_data = load_data(
  36. '/root/CLUE-master/baselines/CLUEdataset/iflytek/train.json'
  37. )
  38. valid_data = load_data(
  39. '/root/CLUE-master/baselines/CLUEdataset/iflytek/dev.json'
  40. )
  41. # 建立分词器
  42. tokenizer = Tokenizer(dict_path, do_lower_case=True)
  43. class data_generator(DataGenerator):
  44. """数据生成器
  45. """
  46. def __iter__(self, random=False):
  47. batch_token_ids, batch_segment_ids, batch_labels = [], [], []
  48. for is_end, (text, label) in self.sample(random):
  49. token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
  50. batch_token_ids.append(token_ids)
  51. batch_segment_ids.append(segment_ids)
  52. batch_labels.append([label])
  53. if len(batch_token_ids) == self.batch_size or is_end:
  54. batch_token_ids = sequence_padding(batch_token_ids)
  55. batch_segment_ids = sequence_padding(batch_segment_ids)
  56. batch_labels = sequence_padding(batch_labels)
  57. yield [batch_token_ids, batch_segment_ids], batch_labels
  58. batch_token_ids, batch_segment_ids, batch_labels = [], [], []
  59. # 转换数据集
  60. train_generator = data_generator(train_data, batch_size)
  61. valid_generator = data_generator(valid_data, batch_size)
  62. # 加载预训练模型
  63. bert = build_transformer_model(
  64. config_path=config_path,
  65. checkpoint_path=checkpoint_path,
  66. return_keras_model=False,
  67. )
  68. output = Lambda(lambda x: x[:, 0])(bert.model.output)
  69. output = Dense(
  70. units=num_classes,
  71. activation='softmax',
  72. kernel_initializer=bert.initializer
  73. )(output)
  74. model = keras.models.Model(bert.model.input, output)
  75. model.summary()
  76. model.compile(
  77. loss='sparse_categorical_crossentropy',
  78. optimizer=Adam(2e-5),
  79. metrics=['sparse_categorical_accuracy'],
  80. )
  81. def adversarial_training(model, embedding_name, epsilon=1):
  82. """给模型添加对抗训练
  83. 其中model是需要添加对抗训练的keras模型,embedding_name
  84. 则是model里边Embedding层的名字。要在模型compile之后使用。
  85. """
  86. if model.train_function is None: # 如果还没有训练函数
  87. model._make_train_function() # 手动make
  88. old_train_function = model.train_function # 备份旧的训练函数
  89. # 查找Embedding层
  90. for output in model.outputs:
  91. embedding_layer = search_layer(output, embedding_name)
  92. if embedding_layer is not None:
  93. break
  94. if embedding_layer is None:
  95. raise Exception('Embedding layer not found')
  96. # 求Embedding梯度
  97. embeddings = embedding_layer.embeddings # Embedding矩阵
  98. gradients = K.gradients(model.total_loss, [embeddings]) # Embedding梯度
  99. gradients = K.zeros_like(embeddings) + gradients[0] # 转为dense tensor
  100. # 封装为函数
  101. inputs = (
  102. model._feed_inputs + model._feed_targets + model._feed_sample_weights
  103. ) # 所有输入层
  104. embedding_gradients = K.function(
  105. inputs=inputs,
  106. outputs=[gradients],
  107. name='embedding_gradients',
  108. ) # 封装为函数
  109. def train_function(inputs): # 重新定义训练函数
  110. grads = embedding_gradients(inputs)[0] # Embedding梯度
  111. delta = epsilon * grads / (np.sqrt((grads**2).sum()) + 1e-8) # 计算扰动
  112. K.set_value(embeddings, K.eval(embeddings) + delta) # 注入扰动
  113. outputs = old_train_function(inputs) # 梯度下降
  114. K.set_value(embeddings, K.eval(embeddings) - delta) # 删除扰动
  115. return outputs
  116. model.train_function = train_function # 覆盖原训练函数
  117. # 写好函数后,启用对抗训练只需要一行代码
  118. adversarial_training(model, 'Embedding-Token', 0.5)
  119. def evaluate(data):
  120. total, right = 0., 0.
  121. for x_true, y_true in data:
  122. y_pred = model.predict(x_true).argmax(axis=1)
  123. y_true = y_true[:, 0]
  124. total += len(y_true)
  125. right += (y_true == y_pred).sum()
  126. return right / total
  127. class Evaluator(keras.callbacks.Callback):
  128. """评估与保存
  129. """
  130. def __init__(self):
  131. self.best_val_acc = 0.
  132. def on_epoch_end(self, epoch, logs=None):
  133. val_acc = evaluate(valid_generator)
  134. if val_acc > self.best_val_acc:
  135. self.best_val_acc = val_acc
  136. model.save_weights('best_model.weights')
  137. print(
  138. u'val_acc: %.5f, best_val_acc: %.5f\n' %
  139. (val_acc, self.best_val_acc)
  140. )
  141. def predict_to_file(in_file, out_file):
  142. """输出预测结果到文件
  143. 结果文件可以提交到 https://www.cluebenchmarks.com 评测。
  144. """
  145. fw = open(out_file, 'w')
  146. with open(in_file) as fr:
  147. for l in tqdm(fr):
  148. l = json.loads(l)
  149. text = l['sentence']
  150. token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
  151. label = model.predict([[token_ids], [segment_ids]])[0].argmax()
  152. l = json.dumps({'id': str(l['id']), 'label': str(label)})
  153. fw.write(l + '\n')
  154. fw.close()
  155. if __name__ == '__main__':
  156. evaluator = Evaluator()
  157. model.fit(
  158. train_generator.forfit(),
  159. steps_per_epoch=len(train_generator),
  160. epochs=50,
  161. callbacks=[evaluator]
  162. )
  163. else:
  164. model.load_weights('best_model.weights')
  165. # predict_to_file('/root/CLUE-master/baselines/CLUEdataset/iflytek/test.json', 'iflytek_predict.json')

 

GitHub - bojone/keras_adversarial_training: Adversarial Training for NLP in Keras

训练技巧 | 功守道:NLP中的对抗训练 + PyTorch实现 - 灰信网(软件开发博客聚合)

【炼丹技巧】功守道:NLP中的对抗训练 + PyTorch实现 - 知乎

GitHub - bojone/bert4keras: keras implement of transformers for humans

对抗训练_Fang Suk的博客-CSDN博客_对抗训练

对抗训练浅谈:意义、方法和思考(附Keras实现) - 科学空间|Scientific Spaces 

bert4keras 文档中心 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/运维做开发/article/detail/965936
推荐阅读
相关标签
  

闽ICP备14008679号