当前位置:   article > 正文

NLP-预训练模型-2018-Bert-解析:BertForMaskedLM

bertformaskedlm
import numpy as np
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertForMaskedLM

# Load pre-trained model (weights)
with torch.no_grad():
    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = BertTokenizer.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
    model = BertForMaskedLM.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese')
    model.eval()

    sentence = "我不会忘记和你一起奋斗的时光。"
    tokenize_input = tokenizer.tokenize(sentence)
    print('tokenize_input = ', tokenize_input)

    tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
    sen_len = len(tokenize_input)
    sentence_loss = 0.

    for idx, word in enumerate(tokenize_input):
        print('\n\n idx = {0}'.format(idx))
        # add mask to i-th character of the sentence
        tokenize_input[idx] = '[MASK]'
        mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
        print('\t mask_input = {0}'.format(mask_input))

        output = model(mask_input)
        print('\n\t output = {0}'.format(output))

        prediction_scores = output[0]
        print('\n\t prediction_scores = output[0] = {0}'.format(prediction_scores))

        softmax = nn.Softmax(dim=0)
        ps = softmax(prediction_scores[0, idx]).log()
        print('\n\t ps = {0}'.format(ps))

        word_loss = ps[tensor_input[0, idx]]
        print('\n\t word_loss = {0}'.format(word_loss))

        sentence_loss += word_loss.item()
        tokenize_input[idx] = word

    ppl = np.exp(-sentence_loss / sen_len)

    print("sentence_loss = {0};ppl = {1}".format(sentence_loss, ppl))
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46

打印结果:

tokenize_input =  ['我', '不', '会', '忘', '记', '和', '你', '一', '起', '奋', '斗', '的', '时', '光', '。']


 idx = 0
	 mask_input = tensor([[ 103,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-10.0067,  -9.9702, -10.3403,  ...,  -7.0367,  -7.9918,  -7.8884],
         [ -8.9250,  -8.6627,  -8.8329,  ...,  -5.6988,  -5.0543,  -7.3196],
         [-17.5815, -16.8282, -17.5551,  ..., -11.2575,  -8.4464, -15.8063],
         ...,
         [-17.7271, -17.4097, -18.3814,  ..., -12.5380, -14.9620, -13.0537],
         [-14.8090, -15.5407, -14.8516,  ...,  -9.6344,  -8.9355, -11.3215],
         [-10.2498, -10.0447, -10.2479,  ...,  -5.7584,  -4.9482,  -5.1695]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-10.0067,  -9.9702, -10.3403,  ...,  -7.0367,  -7.9918,  -7.8884],
         [ -8.9250,  -8.6627,  -8.8329,  ...,  -5.6988,  -5.0543,  -7.3196],
         [-17.5815, -16.8282, -17.5551,  ..., -11.2575,  -8.4464, -15.8063],
         ...,
         [-17.7271, -17.4097, -18.3814,  ..., -12.5380, -14.9620, -13.0537],
         [-14.8090, -15.5407, -14.8516,  ...,  -9.6344,  -8.9355, -11.3215],
         [-10.2498, -10.0447, -10.2479,  ...,  -5.7584,  -4.9482,  -5.1695]]])

	 ps = tensor([-20.2204, -20.1840, -20.5541,  ..., -17.2505, -18.2055, -18.1022])

	 word_loss = -4.207489013671875


 idx = 1
	 mask_input = tensor([[2769,  103,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-14.2572, -13.5664, -13.7818,  ..., -12.4880, -11.4941, -11.0043],
         [-13.3572, -12.7593, -13.1295,  ...,  -8.9165,  -6.9501,  -8.0928],
         [-18.3267, -17.2391, -16.6626,  ...,  -9.1351,  -8.5136, -10.8610],
         ...,
         [-16.8631, -15.9635, -16.3637,  ..., -11.8876, -12.6025, -10.4363],
         [-14.1836, -14.0044, -13.6275,  ...,  -9.0348, -10.7950,  -9.2346],
         [-16.2714, -15.7472, -15.5543,  ...,  -9.3256,  -9.7824,  -7.4806]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-14.2572, -13.5664, -13.7818,  ..., -12.4880, -11.4941, -11.0043],
         [-13.3572, -12.7593, -13.1295,  ...,  -8.9165,  -6.9501,  -8.0928],
         [-18.3267, -17.2391, -16.6626,  ...,  -9.1351,  -8.5136, -10.8610],
         ...,
         [-16.8631, -15.9635, -16.3637,  ..., -11.8876, -12.6025, -10.4363],
         [-14.1836, -14.0044, -13.6275,  ...,  -9.0348, -10.7950,  -9.2346],
         [-16.2714, -15.7472, -15.5543,  ...,  -9.3256,  -9.7824,  -7.4806]]])

	 ps = tensor([-27.0073, -26.4094, -26.7796,  ..., -22.5666, -20.6002, -21.7429])

	 word_loss = -3.4179904460906982


 idx = 2
	 mask_input = tensor([[2769,  679,  103, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-10.4260,  -9.7421, -10.0949,  ...,  -9.1981,  -9.3232,  -9.0737],
         [-11.1497, -10.3329, -10.3952,  ...,  -6.6423,  -5.8855,  -7.4425],
         [-10.2441,  -9.8596, -10.0538,  ...,  -6.8899,  -6.3872,  -7.1557],
         ...,
         [-14.8344, -13.9255, -14.6416,  ..., -11.8463, -11.3034,  -9.4505],
         [-13.0585, -12.7334, -12.5315,  ...,  -9.1430,  -9.0249,  -8.6625],
         [-10.8999, -10.1885, -10.4381,  ...,  -6.9490,  -6.5864,  -5.2088]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-10.4260,  -9.7421, -10.0949,  ...,  -9.1981,  -9.3232,  -9.0737],
         [-11.1497, -10.3329, -10.3952,  ...,  -6.6423,  -5.8855,  -7.4425],
         [-10.2441,  -9.8596, -10.0538,  ...,  -6.8899,  -6.3872,  -7.1557],
         ...,
         [-14.8344, -13.9255, -14.6416,  ..., -11.8463, -11.3034,  -9.4505],
         [-13.0585, -12.7334, -12.5315,  ...,  -9.1430,  -9.0249,  -8.6625],
         [-10.8999, -10.1885, -10.4381,  ...,  -6.9490,  -6.5864,  -5.2088]]])

	 ps = tensor([-23.9556, -23.5712, -23.7654,  ..., -20.6015, -20.0987, -20.8673])

	 word_loss = -3.0778353214263916


 idx = 3
	 mask_input = tensor([[2769,  679,  833,  103, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1854, -10.8186, -10.8980,  ..., -10.0304,  -6.8312, -10.1228],
         [-18.3292, -17.1635, -18.1168,  ..., -12.8976,  -6.5055, -10.3133],
         [-18.9977, -17.6461, -18.6712,  ..., -12.0834,  -9.4692, -13.3222],
         ...,
         [-15.9868, -15.1038, -15.7956,  ..., -11.8385,  -8.8921, -11.2440],
         [-13.2753, -13.0012, -12.8868,  ...,  -8.5294,  -7.7151,  -9.6861],
         [-14.0791, -13.6179, -13.8650,  ...,  -9.9380,  -8.0259,  -6.6505]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.1854, -10.8186, -10.8980,  ..., -10.0304,  -6.8312, -10.1228],
         [-18.3292, -17.1635, -18.1168,  ..., -12.8976,  -6.5055, -10.3133],
         [-18.9977, -17.6461, -18.6712,  ..., -12.0834,  -9.4692, -13.3222],
         ...,
         [-15.9868, -15.1038, -15.7956,  ..., -11.8385,  -8.8921, -11.2440],
         [-13.2753, -13.0012, -12.8868,  ...,  -8.5294,  -7.7151,  -9.6861],
         [-14.0791, -13.6179, -13.8650,  ...,  -9.9380,  -8.0259,  -6.6505]]])

	 ps = tensor([-28.6803, -28.3364, -28.7086,  ..., -26.4609, -23.3448, -25.8600])

	 word_loss = -0.024608036503195763


 idx = 4
	 mask_input = tensor([[2769,  679,  833, 2563,  103, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6949, -11.0920, -11.4218,  ..., -10.2302,  -9.3920, -10.9836],
         [-18.6331, -17.9585, -18.3607,  ..., -12.7316, -10.2360, -14.0741],
         [-19.6247, -18.4559, -19.2653,  ..., -12.6368, -11.0657, -15.6243],
         ...,
         [-15.9810, -15.1353, -15.9852,  ..., -12.4308, -12.2341, -10.9428],
         [-13.4082, -13.1908, -13.3454,  ..., -10.0117, -10.6251, -10.7604],
         [-13.8807, -13.1495, -13.6315,  ...,  -9.3678,  -9.9106,  -7.1275]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.6949, -11.0920, -11.4218,  ..., -10.2302,  -9.3920, -10.9836],
         [-18.6331, -17.9585, -18.3607,  ..., -12.7316, -10.2360, -14.0741],
         [-19.6247, -18.4559, -19.2653,  ..., -12.6368, -11.0657, -15.6243],
         ...,
         [-15.9810, -15.1353, -15.9852,  ..., -12.4308, -12.2341, -10.9428],
         [-13.4082, -13.1908, -13.3454,  ..., -10.0117, -10.6251, -10.7604],
         [-13.8807, -13.1495, -13.6315,  ...,  -9.3678,  -9.9106,  -7.1275]]])

	 ps = tensor([-30.6680, -30.0711, -30.5083,  ..., -28.1964, -25.7133, -29.4577])

	 word_loss = -0.021782301366329193


 idx = 5
	 mask_input = tensor([[2769,  679,  833, 2563, 6381,  103,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-10.8215, -10.1308, -10.5400,  ...,  -9.4374,  -9.1841,  -9.7690],
         [-16.6464, -15.7021, -16.0986,  ...,  -9.1416,  -7.5447,  -8.9926],
         [-18.4551, -17.0224, -17.3103,  ...,  -8.7594,  -8.8654, -10.6732],
         ...,
         [-14.8322, -13.5759, -14.5636,  ..., -10.8961, -10.6665,  -8.9241],
         [-12.3797, -11.8117, -11.9058,  ...,  -8.7238,  -9.1733,  -9.1059],
         [-12.6140, -11.4767, -11.6919,  ...,  -8.0748,  -9.4955,  -5.7950]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-10.8215, -10.1308, -10.5400,  ...,  -9.4374,  -9.1841,  -9.7690],
         [-16.6464, -15.7021, -16.0986,  ...,  -9.1416,  -7.5447,  -8.9926],
         [-18.4551, -17.0224, -17.3103,  ...,  -8.7594,  -8.8654, -10.6732],
         ...,
         [-14.8322, -13.5759, -14.5636,  ..., -10.8961, -10.6665,  -8.9241],
         [-12.3797, -11.8117, -11.9058,  ...,  -8.7238,  -9.1733,  -9.1059],
         [-12.6140, -11.4767, -11.6919,  ...,  -8.0748,  -9.4955,  -5.7950]]])

	 ps = tensor([-20.0339, -19.5133, -20.0343,  ..., -17.0866, -17.4351, -15.4161])

	 word_loss = -2.464529037475586


 idx = 6
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  103,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6927, -10.7244, -10.8993,  ...,  -8.0539,  -8.4719,  -9.0431],
         [-14.6502, -14.0066, -14.4193,  ...,  -7.7190,  -5.6522,  -8.8189],
         [-17.8192, -16.0978, -17.0802,  ...,  -8.5008,  -7.9125, -11.5379],
         ...,
         [-15.0797, -14.0576, -14.8092,  ..., -10.5593, -11.1677,  -9.6744],
         [-12.6444, -12.2899, -12.1446,  ...,  -8.7772,  -9.4889,  -9.6838],
         [-11.8326, -11.0902, -11.1104,  ...,  -7.6406,  -8.1461,  -6.2924]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.6927, -10.7244, -10.8993,  ...,  -8.0539,  -8.4719,  -9.0431],
         [-14.6502, -14.0066, -14.4193,  ...,  -7.7190,  -5.6522,  -8.8189],
         [-17.8192, -16.0978, -17.0802,  ...,  -8.5008,  -7.9125, -11.5379],
         ...,
         [-15.0797, -14.0576, -14.8092,  ..., -10.5593, -11.1677,  -9.6744],
         [-12.6444, -12.2899, -12.1446,  ...,  -8.7772,  -9.4889,  -9.6838],
         [-11.8326, -11.0902, -11.1104,  ...,  -7.6406,  -8.1461,  -6.2924]]])

	 ps = tensor([-17.8420, -17.7343, -17.7814,  ..., -15.6324, -16.8942, -15.6699])

	 word_loss = -3.217534065246582


 idx = 7
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  103, 6629, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-12.0652, -11.1372, -11.7658,  ..., -10.3255,  -9.5978, -10.2930],
         [-17.4623, -16.3227, -17.0211,  ..., -10.0448,  -8.8320, -11.6701],
         [-19.7825, -18.2467, -18.9617,  ..., -10.4417, -10.0575, -13.2705],
         ...,
         [-16.7194, -15.7009, -16.5568,  ..., -11.9396, -12.9538,  -9.1279],
         [-14.1858, -13.9772, -14.0763,  ...,  -9.9030, -10.4625,  -8.7678],
         [-14.0998, -13.0324, -13.3418,  ...,  -8.7676, -10.0443,  -6.4476]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-12.0652, -11.1372, -11.7658,  ..., -10.3255,  -9.5978, -10.2930],
         [-17.4623, -16.3227, -17.0211,  ..., -10.0448,  -8.8320, -11.6701],
         [-19.7825, -18.2467, -18.9617,  ..., -10.4417, -10.0575, -13.2705],
         ...,
         [-16.7194, -15.7009, -16.5568,  ..., -11.9396, -12.9538,  -9.1279],
         [-14.1858, -13.9772, -14.0763,  ...,  -9.9030, -10.4625,  -8.7678],
         [-14.0998, -13.0324, -13.3418,  ...,  -8.7676, -10.0443,  -6.4476]]])

	 ps = tensor([-29.0154, -28.9152, -28.5686,  ..., -23.7333, -25.4041, -24.8862])

	 word_loss = -0.006231430917978287


 idx = 8
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671,  103, 1939, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-12.3327, -11.7290, -12.1774,  ..., -10.6400,  -9.2812, -10.8762],
         [-17.4025, -16.3325, -17.3093,  ...,  -9.6641,  -8.0054, -10.9477],
         [-19.8157, -18.1812, -19.2325,  ..., -10.3199,  -9.6911, -13.2068],
         ...,
         [-15.4990, -14.1986, -15.4210,  ..., -10.8605, -11.1951,  -9.2175],
         [-13.5214, -13.1154, -13.2580,  ...,  -9.1551,  -8.5442,  -8.5556],
         [-13.9661, -12.7296, -13.4830,  ...,  -7.9905,  -9.4974,  -5.5795]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-12.3327, -11.7290, -12.1774,  ..., -10.6400,  -9.2812, -10.8762],
         [-17.4025, -16.3325, -17.3093,  ...,  -9.6641,  -8.0054, -10.9477],
         [-19.8157, -18.1812, -19.2325,  ..., -10.3199,  -9.6911, -13.2068],
         ...,
         [-15.4990, -14.1986, -15.4210,  ..., -10.8605, -11.1951,  -9.2175],
         [-13.5214, -13.1154, -13.2580,  ...,  -9.1551,  -8.5442,  -8.5556],
         [-13.9661, -12.7296, -13.4830,  ...,  -7.9905,  -9.4974,  -5.5795]]])

	 ps = tensor([-26.1031, -25.4673, -25.6910,  ..., -23.7415, -24.6235, -23.6001])

	 word_loss = -0.4470815658569336


 idx = 9
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629,  103, 3159, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9872, -12.3978, -12.9848,  ..., -11.8125, -12.0875, -12.0079],
         [-17.5210, -16.8555, -17.3870,  ..., -10.8851,  -9.9333, -12.9947],
         [-19.9390, -18.8892, -19.5466,  ..., -12.1456, -11.2809, -13.7224],
         ...,
         [-14.5711, -13.7166, -14.6204,  ..., -10.1978, -11.9384,  -9.0040],
         [-13.0610, -12.8815, -12.9802,  ...,  -9.4830,  -9.4141, -10.5692],
         [-14.2910, -13.4047, -14.0815,  ...,  -8.9679, -11.1636,  -6.8003]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-12.9872, -12.3978, -12.9848,  ..., -11.8125, -12.0875, -12.0079],
         [-17.5210, -16.8555, -17.3870,  ..., -10.8851,  -9.9333, -12.9947],
         [-19.9390, -18.8892, -19.5466,  ..., -12.1456, -11.2809, -13.7224],
         ...,
         [-14.5711, -13.7166, -14.6204,  ..., -10.1978, -11.9384,  -9.0040],
         [-13.0610, -12.8815, -12.9802,  ...,  -9.4830,  -9.4141, -10.5692],
         [-14.2910, -13.4047, -14.0815,  ...,  -8.9679, -11.1636,  -6.8003]]])

	 ps = tensor([-23.7871, -23.3177, -23.7469,  ..., -19.6898, -21.1636, -19.3445])

	 word_loss = -1.905866265296936


 idx = 10
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939,  103, 4638,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-10.2435,  -9.4682,  -9.9029,  ...,  -8.6173,  -7.9944,  -9.5463],
         [-14.7155, -14.1531, -14.7035,  ...,  -7.7060,  -7.0066,  -8.7167],
         [-17.8262, -16.8357, -17.2724,  ...,  -9.3416,  -9.6015, -11.3678],
         ...,
         [-13.5025, -12.6059, -13.4680,  ...,  -9.6887, -10.2040,  -7.5718],
         [-11.8572, -11.8200, -11.6956,  ...,  -8.0838,  -8.2098,  -7.9838],
         [-11.4906, -10.7753, -11.1489,  ...,  -6.4764,  -8.7700,  -4.7994]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-10.2435,  -9.4682,  -9.9029,  ...,  -8.6173,  -7.9944,  -9.5463],
         [-14.7155, -14.1531, -14.7035,  ...,  -7.7060,  -7.0066,  -8.7167],
         [-17.8262, -16.8357, -17.2724,  ...,  -9.3416,  -9.6015, -11.3678],
         ...,
         [-13.5025, -12.6059, -13.4680,  ...,  -9.6887, -10.2040,  -7.5718],
         [-11.8572, -11.8200, -11.6956,  ...,  -8.0838,  -8.2098,  -7.9838],
         [-11.4906, -10.7753, -11.1489,  ...,  -6.4764,  -8.7700,  -4.7994]]])

	 ps = tensor([-23.3028, -23.2676, -24.0384,  ..., -20.8967, -21.3373, -20.7125])

	 word_loss = -0.3310864269733429


 idx = 11
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159,  103,
         3198, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1587, -10.6746, -11.6326,  ...,  -9.9938,  -8.8795, -10.4635],
         [-14.6800, -14.1649, -14.6931,  ...,  -7.1853,  -6.1263, -11.4231],
         [-17.5996, -16.4610, -17.1693,  ...,  -7.9229,  -7.0681, -13.4018],
         ...,
         [-13.6107, -12.4848, -13.5183,  ...,  -9.4305,  -9.1442,  -7.4951],
         [-11.5701, -11.2959, -11.3109,  ...,  -7.2745,  -7.0823,  -8.4521],
         [-13.5606, -12.9446, -13.3137,  ...,  -8.5220,  -9.7932,  -7.7482]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.1587, -10.6746, -11.6326,  ...,  -9.9938,  -8.8795, -10.4635],
         [-14.6800, -14.1649, -14.6931,  ...,  -7.1853,  -6.1263, -11.4231],
         [-17.5996, -16.4610, -17.1693,  ...,  -7.9229,  -7.0681, -13.4018],
         ...,
         [-13.6107, -12.4848, -13.5183,  ...,  -9.4305,  -9.1442,  -7.4951],
         [-11.5701, -11.2959, -11.3109,  ...,  -7.2745,  -7.0823,  -8.4521],
         [-13.5606, -12.9446, -13.3137,  ...,  -8.5220,  -9.7932,  -7.7482]]])

	 ps = tensor([-24.5581, -24.6442, -24.8213,  ..., -21.0443, -21.8916, -20.5020])

	 word_loss = -0.0409548319876194


 idx = 12
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
          103, 1045,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.2262, -11.1173, -11.6287,  ..., -10.3565,  -9.4391, -11.4225],
         [-15.2089, -14.6585, -15.3925,  ...,  -7.9105,  -6.7598, -10.2716],
         [-17.7514, -16.8604, -17.4242,  ...,  -8.0904,  -8.6169, -12.3799],
         ...,
         [-11.9148, -11.5928, -12.1447,  ...,  -7.0739,  -9.0568,  -7.8991],
         [-10.9299, -10.9160, -10.9438,  ...,  -5.0096,  -7.1774,  -7.3603],
         [-14.6292, -14.3548, -14.3348,  ...,  -6.8946, -10.3034,  -8.7604]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.2262, -11.1173, -11.6287,  ..., -10.3565,  -9.4391, -11.4225],
         [-15.2089, -14.6585, -15.3925,  ...,  -7.9105,  -6.7598, -10.2716],
         [-17.7514, -16.8604, -17.4242,  ...,  -8.0904,  -8.6169, -12.3799],
         ...,
         [-11.9148, -11.5928, -12.1447,  ...,  -7.0739,  -9.0568,  -7.8991],
         [-10.9299, -10.9160, -10.9438,  ...,  -5.0096,  -7.1774,  -7.3603],
         [-14.6292, -14.3548, -14.3348,  ...,  -6.8946, -10.3034,  -8.7604]]])

	 ps = tensor([-26.6420, -26.3200, -26.8719,  ..., -21.8011, -23.7840, -22.6264])

	 word_loss = -0.2741313576698303


 idx = 13
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198,  103,  511]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9708, -12.1014, -12.6502,  ..., -10.6858, -10.9495, -11.6393],
         [-17.4693, -16.4352, -17.2923,  ..., -10.1345,  -9.2979, -12.3043],
         [-19.2976, -17.8839, -18.8252,  ..., -11.4233, -10.9146, -13.9556],
         ...,
         [-14.2439, -13.8837, -14.3827,  ..., -10.8131,  -9.7626, -10.4449],
         [-11.0731, -11.4156, -11.2104,  ...,  -8.5579,  -9.0104,  -8.7935],
         [-13.5802, -13.1632, -13.3280,  ...,  -9.2640, -10.9600,  -8.3216]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-12.9708, -12.1014, -12.6502,  ..., -10.6858, -10.9495, -11.6393],
         [-17.4693, -16.4352, -17.2923,  ..., -10.1345,  -9.2979, -12.3043],
         [-19.2976, -17.8839, -18.8252,  ..., -11.4233, -10.9146, -13.9556],
         ...,
         [-14.2439, -13.8837, -14.3827,  ..., -10.8131,  -9.7626, -10.4449],
         [-11.0731, -11.4156, -11.2104,  ...,  -8.5579,  -9.0104,  -8.7935],
         [-13.5802, -13.1632, -13.3280,  ...,  -9.2640, -10.9600,  -8.3216]]])

	 ps = tensor([-26.7180, -27.0605, -26.8553,  ..., -24.2028, -24.6553, -24.4384])

	 word_loss = -2.3570048809051514


 idx = 14
	 mask_input = tensor([[2769,  679,  833, 2563, 6381, 1469,  872,  671, 6629, 1939, 3159, 4638,
         3198, 1045,  103]])

	 output = MaskedLMOutput(loss=None, logits=tensor([[[-11.3928, -10.5590, -11.3276,  ...,  -8.2870,  -7.0281,  -9.3417],
         [-15.1979, -14.2848, -14.9167,  ...,  -8.0477,  -3.3199,  -9.9085],
         [-16.9765, -15.7591, -16.4064,  ...,  -7.3844,  -3.6073, -10.5002],
         ...,
         [-14.3350, -13.5203, -14.7181,  ...,  -9.1939,  -8.4368,  -6.3008],
         [-11.5855, -11.6669, -11.5224,  ...,  -6.1303,  -7.0456,  -5.4713],
         [ -9.3767,  -9.1142,  -9.3964,  ...,  -5.2297,  -5.3290,  -3.2478]]]), hidden_states=None, attentions=None)

	 prediction_scores = output[0] = tensor([[[-11.3928, -10.5590, -11.3276,  ...,  -8.2870,  -7.0281,  -9.3417],
         [-15.1979, -14.2848, -14.9167,  ...,  -8.0477,  -3.3199,  -9.9085],
         [-16.9765, -15.7591, -16.4064,  ...,  -7.3844,  -3.6073, -10.5002],
         ...,
         [-14.3350, -13.5203, -14.7181,  ...,  -9.1939,  -8.4368,  -6.3008],
         [-11.5855, -11.6669, -11.5224,  ...,  -6.1303,  -7.0456,  -5.4713],
         [ -9.3767,  -9.1142,  -9.3964,  ...,  -5.2297,  -5.3290,  -3.2478]]])

	 ps = tensor([-20.6789, -20.4164, -20.6986,  ..., -16.5319, -16.6312, -14.5500])

	 word_loss = -1.3718788623809814
sentence_loss = -23.16600384376943;ppl = 4.685160888290345

Process finished with exit code 0
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
  • 292
  • 293
  • 294
  • 295
  • 296
  • 297
  • 298
  • 299
  • 300
  • 301
  • 302
  • 303
  • 304
  • 305
  • 306
  • 307
  • 308
  • 309
  • 310
  • 311
  • 312
  • 313
  • 314
  • 315
  • 316
  • 317
  • 318
  • 319
  • 320
  • 321
  • 322
  • 323
  • 324
  • 325
  • 326
  • 327
  • 328
  • 329
  • 330
  • 331
  • 332
  • 333
  • 334
  • 335
  • 336
  • 337
  • 338
  • 339
  • 340
  • 341
  • 342
  • 343
  • 344
  • 345
  • 346
  • 347
  • 348
  • 349
  • 350
  • 351
  • 352
  • 353
  • 354
  • 355
  • 356
  • 357
  • 358
  • 359
  • 360
  • 361
  • 362
  • 363
  • 364
  • 365
  • 366
  • 367
  • 368
  • 369
  • 370
  • 371
  • 372
  • 373
  • 374
  • 375
  • 376
  • 377
  • 378
  • 379



参考资料
基于BertForMaskedLM(Language Model) 的数据增强技术

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/513818
推荐阅读
相关标签
  

闽ICP备14008679号