赞
踩
import numpy as np import torch import torch.nn as nn from transformers import BertTokenizer, BertForMaskedLM # Load pre-trained model (weights) with torch.no_grad(): # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese') model = BertForMaskedLM.from_pretrained(r'D:\Pretrained_Model\bert-base-chinese') model.eval() sentence = "我不会忘记和你一起奋斗的时光。" tokenize_input = tokenizer.tokenize(sentence) print('tokenize_input = ', tokenize_input) tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)]) sen_len = len(tokenize_input) sentence_loss = 0. for idx, word in enumerate(tokenize_input): print('\n\n idx = {0}'.format(idx)) # add mask to i-th character of the sentence tokenize_input[idx] = '[MASK]' mask_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)]) print('\t mask_input = {0}'.format(mask_input)) output = model(mask_input) print('\n\t output = {0}'.format(output)) prediction_scores = output[0] print('\n\t prediction_scores = output[0] = {0}'.format(prediction_scores)) softmax = nn.Softmax(dim=0) ps = softmax(prediction_scores[0, idx]).log() print('\n\t ps = {0}'.format(ps)) word_loss = ps[tensor_input[0, idx]] print('\n\t word_loss = {0}'.format(word_loss)) sentence_loss += word_loss.item() tokenize_input[idx] = word ppl = np.exp(-sentence_loss / sen_len) print("sentence_loss = {0};ppl = {1}".format(sentence_loss, ppl))
打印结果:
tokenize_input = ['我', '不', '会', '忘', '记', '和', '你', '一', '起', '奋', '斗', '的', '时', '光', '。'] idx = 0 mask_input = tensor([[ 103, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884], [ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196], [-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063], ..., [-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537], [-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215], [-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-10.0067, -9.9702, -10.3403, ..., -7.0367, -7.9918, -7.8884], [ -8.9250, -8.6627, -8.8329, ..., -5.6988, -5.0543, -7.3196], [-17.5815, -16.8282, -17.5551, ..., -11.2575, -8.4464, -15.8063], ..., [-17.7271, -17.4097, -18.3814, ..., -12.5380, -14.9620, -13.0537], [-14.8090, -15.5407, -14.8516, ..., -9.6344, -8.9355, -11.3215], [-10.2498, -10.0447, -10.2479, ..., -5.7584, -4.9482, -5.1695]]]) ps = tensor([-20.2204, -20.1840, -20.5541, ..., -17.2505, -18.2055, -18.1022]) word_loss = -4.207489013671875 idx = 1 mask_input = tensor([[2769, 103, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043], [-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928], [-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610], ..., [-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363], [-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346], [-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-14.2572, -13.5664, -13.7818, ..., -12.4880, -11.4941, -11.0043], [-13.3572, -12.7593, -13.1295, ..., -8.9165, -6.9501, -8.0928], [-18.3267, -17.2391, -16.6626, ..., -9.1351, -8.5136, -10.8610], ..., [-16.8631, -15.9635, -16.3637, ..., -11.8876, -12.6025, -10.4363], [-14.1836, -14.0044, -13.6275, ..., -9.0348, -10.7950, -9.2346], [-16.2714, -15.7472, -15.5543, ..., -9.3256, -9.7824, -7.4806]]]) ps = tensor([-27.0073, -26.4094, -26.7796, ..., -22.5666, -20.6002, -21.7429]) word_loss = -3.4179904460906982 idx = 2 mask_input = tensor([[2769, 679, 103, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737], [-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425], [-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557], ..., [-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505], [-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625], [-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-10.4260, -9.7421, -10.0949, ..., -9.1981, -9.3232, -9.0737], [-11.1497, -10.3329, -10.3952, ..., -6.6423, -5.8855, -7.4425], [-10.2441, -9.8596, -10.0538, ..., -6.8899, -6.3872, -7.1557], ..., [-14.8344, -13.9255, -14.6416, ..., -11.8463, -11.3034, -9.4505], [-13.0585, -12.7334, -12.5315, ..., -9.1430, -9.0249, -8.6625], [-10.8999, -10.1885, -10.4381, ..., -6.9490, -6.5864, -5.2088]]]) ps = tensor([-23.9556, -23.5712, -23.7654, ..., -20.6015, -20.0987, -20.8673]) word_loss = -3.0778353214263916 idx = 3 mask_input = tensor([[2769, 679, 833, 103, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228], [-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133], [-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222], ..., [-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440], [-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861], [-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.1854, -10.8186, -10.8980, ..., -10.0304, -6.8312, -10.1228], [-18.3292, -17.1635, -18.1168, ..., -12.8976, -6.5055, -10.3133], [-18.9977, -17.6461, -18.6712, ..., -12.0834, -9.4692, -13.3222], ..., [-15.9868, -15.1038, -15.7956, ..., -11.8385, -8.8921, -11.2440], [-13.2753, -13.0012, -12.8868, ..., -8.5294, -7.7151, -9.6861], [-14.0791, -13.6179, -13.8650, ..., -9.9380, -8.0259, -6.6505]]]) ps = tensor([-28.6803, -28.3364, -28.7086, ..., -26.4609, -23.3448, -25.8600]) word_loss = -0.024608036503195763 idx = 4 mask_input = tensor([[2769, 679, 833, 2563, 103, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836], [-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741], [-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243], ..., [-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428], [-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604], [-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.6949, -11.0920, -11.4218, ..., -10.2302, -9.3920, -10.9836], [-18.6331, -17.9585, -18.3607, ..., -12.7316, -10.2360, -14.0741], [-19.6247, -18.4559, -19.2653, ..., -12.6368, -11.0657, -15.6243], ..., [-15.9810, -15.1353, -15.9852, ..., -12.4308, -12.2341, -10.9428], [-13.4082, -13.1908, -13.3454, ..., -10.0117, -10.6251, -10.7604], [-13.8807, -13.1495, -13.6315, ..., -9.3678, -9.9106, -7.1275]]]) ps = tensor([-30.6680, -30.0711, -30.5083, ..., -28.1964, -25.7133, -29.4577]) word_loss = -0.021782301366329193 idx = 5 mask_input = tensor([[2769, 679, 833, 2563, 6381, 103, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690], [-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926], [-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732], ..., [-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241], [-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059], [-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-10.8215, -10.1308, -10.5400, ..., -9.4374, -9.1841, -9.7690], [-16.6464, -15.7021, -16.0986, ..., -9.1416, -7.5447, -8.9926], [-18.4551, -17.0224, -17.3103, ..., -8.7594, -8.8654, -10.6732], ..., [-14.8322, -13.5759, -14.5636, ..., -10.8961, -10.6665, -8.9241], [-12.3797, -11.8117, -11.9058, ..., -8.7238, -9.1733, -9.1059], [-12.6140, -11.4767, -11.6919, ..., -8.0748, -9.4955, -5.7950]]]) ps = tensor([-20.0339, -19.5133, -20.0343, ..., -17.0866, -17.4351, -15.4161]) word_loss = -2.464529037475586 idx = 6 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 103, 671, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431], [-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189], [-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379], ..., [-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744], [-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838], [-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.6927, -10.7244, -10.8993, ..., -8.0539, -8.4719, -9.0431], [-14.6502, -14.0066, -14.4193, ..., -7.7190, -5.6522, -8.8189], [-17.8192, -16.0978, -17.0802, ..., -8.5008, -7.9125, -11.5379], ..., [-15.0797, -14.0576, -14.8092, ..., -10.5593, -11.1677, -9.6744], [-12.6444, -12.2899, -12.1446, ..., -8.7772, -9.4889, -9.6838], [-11.8326, -11.0902, -11.1104, ..., -7.6406, -8.1461, -6.2924]]]) ps = tensor([-17.8420, -17.7343, -17.7814, ..., -15.6324, -16.8942, -15.6699]) word_loss = -3.217534065246582 idx = 7 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 103, 6629, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930], [-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701], [-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705], ..., [-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279], [-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678], [-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-12.0652, -11.1372, -11.7658, ..., -10.3255, -9.5978, -10.2930], [-17.4623, -16.3227, -17.0211, ..., -10.0448, -8.8320, -11.6701], [-19.7825, -18.2467, -18.9617, ..., -10.4417, -10.0575, -13.2705], ..., [-16.7194, -15.7009, -16.5568, ..., -11.9396, -12.9538, -9.1279], [-14.1858, -13.9772, -14.0763, ..., -9.9030, -10.4625, -8.7678], [-14.0998, -13.0324, -13.3418, ..., -8.7676, -10.0443, -6.4476]]]) ps = tensor([-29.0154, -28.9152, -28.5686, ..., -23.7333, -25.4041, -24.8862]) word_loss = -0.006231430917978287 idx = 8 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 103, 1939, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762], [-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477], [-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068], ..., [-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175], [-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556], [-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-12.3327, -11.7290, -12.1774, ..., -10.6400, -9.2812, -10.8762], [-17.4025, -16.3325, -17.3093, ..., -9.6641, -8.0054, -10.9477], [-19.8157, -18.1812, -19.2325, ..., -10.3199, -9.6911, -13.2068], ..., [-15.4990, -14.1986, -15.4210, ..., -10.8605, -11.1951, -9.2175], [-13.5214, -13.1154, -13.2580, ..., -9.1551, -8.5442, -8.5556], [-13.9661, -12.7296, -13.4830, ..., -7.9905, -9.4974, -5.5795]]]) ps = tensor([-26.1031, -25.4673, -25.6910, ..., -23.7415, -24.6235, -23.6001]) word_loss = -0.4470815658569336 idx = 9 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 103, 3159, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079], [-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947], [-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224], ..., [-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040], [-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692], [-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-12.9872, -12.3978, -12.9848, ..., -11.8125, -12.0875, -12.0079], [-17.5210, -16.8555, -17.3870, ..., -10.8851, -9.9333, -12.9947], [-19.9390, -18.8892, -19.5466, ..., -12.1456, -11.2809, -13.7224], ..., [-14.5711, -13.7166, -14.6204, ..., -10.1978, -11.9384, -9.0040], [-13.0610, -12.8815, -12.9802, ..., -9.4830, -9.4141, -10.5692], [-14.2910, -13.4047, -14.0815, ..., -8.9679, -11.1636, -6.8003]]]) ps = tensor([-23.7871, -23.3177, -23.7469, ..., -19.6898, -21.1636, -19.3445]) word_loss = -1.905866265296936 idx = 10 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 103, 4638, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463], [-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167], [-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678], ..., [-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718], [-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838], [-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-10.2435, -9.4682, -9.9029, ..., -8.6173, -7.9944, -9.5463], [-14.7155, -14.1531, -14.7035, ..., -7.7060, -7.0066, -8.7167], [-17.8262, -16.8357, -17.2724, ..., -9.3416, -9.6015, -11.3678], ..., [-13.5025, -12.6059, -13.4680, ..., -9.6887, -10.2040, -7.5718], [-11.8572, -11.8200, -11.6956, ..., -8.0838, -8.2098, -7.9838], [-11.4906, -10.7753, -11.1489, ..., -6.4764, -8.7700, -4.7994]]]) ps = tensor([-23.3028, -23.2676, -24.0384, ..., -20.8967, -21.3373, -20.7125]) word_loss = -0.3310864269733429 idx = 11 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 103, 3198, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635], [-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231], [-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018], ..., [-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951], [-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521], [-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.1587, -10.6746, -11.6326, ..., -9.9938, -8.8795, -10.4635], [-14.6800, -14.1649, -14.6931, ..., -7.1853, -6.1263, -11.4231], [-17.5996, -16.4610, -17.1693, ..., -7.9229, -7.0681, -13.4018], ..., [-13.6107, -12.4848, -13.5183, ..., -9.4305, -9.1442, -7.4951], [-11.5701, -11.2959, -11.3109, ..., -7.2745, -7.0823, -8.4521], [-13.5606, -12.9446, -13.3137, ..., -8.5220, -9.7932, -7.7482]]]) ps = tensor([-24.5581, -24.6442, -24.8213, ..., -21.0443, -21.8916, -20.5020]) word_loss = -0.0409548319876194 idx = 12 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 103, 1045, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225], [-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716], [-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799], ..., [-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991], [-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603], [-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.2262, -11.1173, -11.6287, ..., -10.3565, -9.4391, -11.4225], [-15.2089, -14.6585, -15.3925, ..., -7.9105, -6.7598, -10.2716], [-17.7514, -16.8604, -17.4242, ..., -8.0904, -8.6169, -12.3799], ..., [-11.9148, -11.5928, -12.1447, ..., -7.0739, -9.0568, -7.8991], [-10.9299, -10.9160, -10.9438, ..., -5.0096, -7.1774, -7.3603], [-14.6292, -14.3548, -14.3348, ..., -6.8946, -10.3034, -8.7604]]]) ps = tensor([-26.6420, -26.3200, -26.8719, ..., -21.8011, -23.7840, -22.6264]) word_loss = -0.2741313576698303 idx = 13 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 103, 511]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393], [-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043], [-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556], ..., [-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449], [-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935], [-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-12.9708, -12.1014, -12.6502, ..., -10.6858, -10.9495, -11.6393], [-17.4693, -16.4352, -17.2923, ..., -10.1345, -9.2979, -12.3043], [-19.2976, -17.8839, -18.8252, ..., -11.4233, -10.9146, -13.9556], ..., [-14.2439, -13.8837, -14.3827, ..., -10.8131, -9.7626, -10.4449], [-11.0731, -11.4156, -11.2104, ..., -8.5579, -9.0104, -8.7935], [-13.5802, -13.1632, -13.3280, ..., -9.2640, -10.9600, -8.3216]]]) ps = tensor([-26.7180, -27.0605, -26.8553, ..., -24.2028, -24.6553, -24.4384]) word_loss = -2.3570048809051514 idx = 14 mask_input = tensor([[2769, 679, 833, 2563, 6381, 1469, 872, 671, 6629, 1939, 3159, 4638, 3198, 1045, 103]]) output = MaskedLMOutput(loss=None, logits=tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417], [-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085], [-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002], ..., [-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008], [-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713], [ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]]), hidden_states=None, attentions=None) prediction_scores = output[0] = tensor([[[-11.3928, -10.5590, -11.3276, ..., -8.2870, -7.0281, -9.3417], [-15.1979, -14.2848, -14.9167, ..., -8.0477, -3.3199, -9.9085], [-16.9765, -15.7591, -16.4064, ..., -7.3844, -3.6073, -10.5002], ..., [-14.3350, -13.5203, -14.7181, ..., -9.1939, -8.4368, -6.3008], [-11.5855, -11.6669, -11.5224, ..., -6.1303, -7.0456, -5.4713], [ -9.3767, -9.1142, -9.3964, ..., -5.2297, -5.3290, -3.2478]]]) ps = tensor([-20.6789, -20.4164, -20.6986, ..., -16.5319, -16.6312, -14.5500]) word_loss = -1.3718788623809814 sentence_loss = -23.16600384376943;ppl = 4.685160888290345 Process finished with exit code 0
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。