当前位置:   article > 正文

pytorch中BCELoss、CrossEntropyLoss和NLLLoss_pytorch bceloss

pytorch bceloss

在PyTorch中进行二分类,有三种主要的全连接层,激活函数和loss function组合的方法,分别是:torch.nn.Linear+torch.sigmoid+torch.nn.BCELoss,torch.nn.Linear+BCEWithLogitsLoss,和torch.nn.Linear(输出维度为2)+torch.nn.CrossEntropyLoss,BCEWithLogitsLoss集成了Sigmoid,但是CrossEntropyLoss集成了Softmax。

下面重点写写几点区别:

  • CrossEntropyLoss的输入logits=(3,2),target=(3)就够了,但是BCELoss、BCEWithLogitsLoss的输入得是logits=(3,2),target=(3,2)。也就是说BCE系列在设计的时候是期待把输出压缩成一维再过;但是CrossEntropyLoss是可以多维且每一维对应某一个类别的logit。
  • CrossEntropyLoss的target是LongTensor,表示是哪一类;但是BCE系列是0到1之间的FloatTensor
  • CrossEntropyLoss和BCE系列从数值上看除了0.5的情况下其他情况完全不一样。BCE系列的数值计算思路是target*log(logits)+(1-target)*log(1-logits);但是CrossEntropyLoss实际上是Softmax+NLLLoss, 最后数值计算思路变成-logits[sample_index][选中类别]+sum(exp(logits[sample_index][i]) for i in all),CE的推导可以参考信息熵 条件熵 交叉熵 联合熵 相对熵 KL散度 SCE MAE 互信息(信息增益)

来点代码:

  1. import torch
  2. from torch import nn
  3. import math
  4. loss_f = nn.CrossEntropyLoss(reduction='none')
  5. output = torch.randn(2, 3) # 表示2个样本,3个类别
  6. # target = torch.from_numpy(np.array([1, 0])).type(torch.LongTensor)
  7. target = torch.LongTensor([0, 2]) # 表示label0和label2
  8. loss = loss_f(output, target)
  9. print('CrossEntropy loss: ', loss)
  10. print(f'reduction=none,所以可以看到每一个样本loss,输出为[{loss}]')
  11. nll = nn.NLLLoss(reduction='none')
  12. logsoftmax = nn.LogSoftmax(dim=-1)
  13. print('logsoftmax(output) result: {}'.format(logsoftmax(output)))
  14. #可以清晰地看到nll这个loss在pytorch多分类里作用就是取个负号,同时去target对应下标拿一下已经算好的logsoftmax的值
  15. print('nll(logsoftmax(output), target) :{}'.format(nll(logsoftmax(output), target)))
  16. def manual_cal(sample_index, target, output):
  17. # 输入是样本下标
  18. sample_output = output[sample_index]
  19. sample_target = target[sample_index]
  20. x_class = sample_output[sample_target]
  21. sample_output_len = len(sample_output)
  22. log_sigma_exp_x = math.log(sum(math.exp(sample_output[i]) for i in range(sample_output_len)))
  23. sample_loss = -x_class + log_sigma_exp_x
  24. print(f'交叉熵手动计算loss{sample_index}{sample_loss}')
  25. return sample_loss
  26. for i in range(2):
  27. manual_cal(i, target, output)
  28. # 如果nn.CrossEntropyLoss(reduction='mean')模式,刚好是手动计算的每个样本的loss取平均,最后输出的是一个值
  29. # 如果nn.CrossEntropyLoss(reduction='none')模式,手动计算的loss0和loss1都会被列出来
  30. '''
  31. 贴一个输出
  32. CrossEntropy loss: tensor([2.7362, 0.9749])
  33. reduction=none,所以可以看到每一个样本loss,输出为[tensor([2.7362, 0.9749])]
  34. logsoftmax(output) result: tensor([[-2.7362, -1.4015, -0.3726],
  35. [-0.8505, -1.6319, -0.9749]])
  36. nll(logsoftmax(output), target) :tensor([2.7362, 0.9749])
  37. 交叉熵手动计算loss0:2.736179828643799
  38. 交叉熵手动计算loss1:0.9749272465705872
  39. '''

如果用Pytorch来实现,可以看以下脚本,顺带连rce(logit和pred对换)和sce(ce和rce加强)也实现了:

  1. import torch.nn.functional as F
  2. import torch
  3. import torch.nn as nn
  4. # nn.CrossEntropyLoss() 和 KLDivLoss 关系
  5. class SCELoss(nn.Module):
  6. def __init__(self, num_classes=10, a=1, b=1, eps=1e-18):
  7. super(SCELoss, self).__init__()
  8. self.num_classes = num_classes
  9. self.a = a #两个超参数
  10. self.b = b
  11. self.cross_entropy = nn.CrossEntropyLoss()
  12. self.cross_entropy_none = nn.CrossEntropyLoss(reduction="none")
  13. self.eps = eps
  14. def forward(self, raw_pred, labels):
  15. # CE 部分,正常的交叉熵损失
  16. ce = self.cross_entropy(raw_pred, labels)
  17. # RCE
  18. pred = F.softmax(raw_pred, dim=1)
  19. pred = torch.clamp(pred, min=self.eps, max=1.0)
  20. label_one_hot = F.one_hot(labels, self.num_classes).float().to(pred.device)
  21. label_one_hot = torch.clamp(label_one_hot, min=self.eps, max=1.0) #最小设为 1e-4,即 A 取 -4
  22. my_ce = (-1 * torch.sum(label_one_hot * torch.log(pred), dim=1))
  23. print('pred={} label_one_hot={} my_ce={}'.format(pred, label_one_hot, my_ce))
  24. print('raw_pred={} labels={} official_ce={}'.format(raw_pred, labels, self.cross_entropy_none(raw_pred, labels)))
  25. rce = (-1 * torch.sum(pred * torch.log(label_one_hot), dim=1))
  26. print('pred={} label_one_hot={} rce={}'.format(pred, label_one_hot, rce))
  27. loss = self.a * ce + self.b * rce.mean()
  28. return loss
  29. y_pred = torch.tensor([[10.0, 5.0, -6.0], [8.0, 8.0, 8.0]])
  30. y_true = torch.tensor([0, 2])
  31. ce1 = SCELoss(num_classes=3)(y_pred, y_true)

来个各种CE的完整实现:

  1. import torch
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. class MyCE1(nn.Module):
  5. def __init__(self):
  6. super(MyCE1, self).__init__()
  7. self.nll = nn.NLLLoss(reduction='none')
  8. self.logsoftmax = nn.LogSoftmax(dim=-1)
  9. def forward(self, logits, targets):
  10. return self.nll(self.logsoftmax(logits), targets)
  11. class MyCE2(nn.Module):
  12. def __init__(self):
  13. super(MyCE2, self).__init__()
  14. def forward(self, logits, targets):
  15. label_one_hot = F.one_hot(targets, num_classes=max(targets)+1)
  16. logits_softmax_log = torch.log(logits.softmax(dim=-1))
  17. res = -1*torch.sum(label_one_hot*logits_softmax_log, dim=-1)
  18. return res
  19. if __name__ == '__main__':
  20. logits = torch.rand(4,3)
  21. targets = torch.LongTensor([1,2,1,0])
  22. myce1 = MyCE1()
  23. myce2 = MyCE2()
  24. ce = nn.CrossEntropyLoss(reduction='none')
  25. print(myce1(logits, targets))
  26. print(myce2(logits, targets))
  27. print(ce(logits, targets))
  28. '''
  29. tensor([0.8806, 0.9890, 1.1915, 1.2485])
  30. tensor([0.8806, 0.9890, 1.1915, 1.2485])
  31. tensor([0.8806, 0.9890, 1.1915, 1.2485])
  32. '''

----------------------------------------

转载自: 二分类问题,应该选择sigmoid还是softmax? - 知乎 

pytorch验证CrossEntropyLoss ,BCELoss 和 BCEWithLogitsLoss - CodeAntenna

PyTorch二分类时BCELoss,CrossEntropyLoss,Sigmoid等的选择和使用 - 知乎

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/951132
推荐阅读
相关标签
  

闽ICP备14008679号