当前位置:   article > 正文

yolov5加入CBAM,SE,CA,ECA注意力机制,纯代码(22.3.1还更新)_cbam注意力机制代码

cbam注意力机制代码

 本文所涉及到的yolov5网络为5.0版本,后续有需求会更新6.0版本。

CBAM注意力

  1. # class ChannelAttention(nn.Module):
  2. # def __init__(self, in_planes, ratio=16):
  3. # super(ChannelAttention, self).__init__()
  4. # self.avg_pool = nn.AdaptiveAvgPool2d(1)
  5. # self.max_pool = nn.AdaptiveMaxPool2d(1)
  6. #
  7. # self.f1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
  8. # self.relu = nn.ReLU()
  9. # self.f2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
  10. # # 写法二,亦可使用顺序容器
  11. # # self.sharedMLP = nn.Sequential(
  12. # # nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(),
  13. # # nn.Conv2d(in_planes // rotio, in_planes, 1, bias=False))
  14. #
  15. # self.sigmoid = nn.Sigmoid()
  16. #
  17. # def forward(self, x):
  18. # avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))
  19. # max_out = self.f2(self.relu(self.f1(self.max_pool(x))))
  20. # out = self.sigmoid(avg_out + max_out)
  21. # return out
  22. #
  23. #
  24. # class SpatialAttention(nn.Module):
  25. # def __init__(self, kernel_size=7):
  26. # super(SpatialAttention, self).__init__()
  27. #
  28. # assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
  29. # padding = 3 if kernel_size == 7 else 1
  30. #
  31. # self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
  32. # self.sigmoid = nn.Sigmoid()
  33. #
  34. # def forward(self, x):
  35. # avg_out = torch.mean(x, dim=1, keepdim=True)
  36. # max_out, _ = torch.max(x, dim=1, keepdim=True)
  37. # x = torch.cat([avg_out, max_out], dim=1)
  38. # x = self.conv(x)
  39. # return self.sigmoid(x)
  40. #
  41. #
  42. # class CBAMC3(nn.Module):
  43. # # CSP Bottleneck with 3 convolutions
  44. # def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  45. # super(CBAMC3, self).__init__()
  46. # c_ = int(c2 * e) # hidden channels
  47. # self.cv1 = Conv(c1, c_, 1, 1)
  48. # self.cv2 = Conv(c1, c_, 1, 1)
  49. # self.cv3 = Conv(2 * c_, c2, 1)
  50. # self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  51. # self.channel_attention = ChannelAttention(c2, 16)
  52. # self.spatial_attention = SpatialAttention(7)
  53. #
  54. # # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  55. #
  56. # def forward(self, x):
  57. # out = self.channel_attention(x) * x
  58. # print('outchannels:{}'.format(out.shape))
  59. # out = self.spatial_attention(out) * out
  60. # return out

CBAM代码 2022.1.26更新

受大佬指点,指出上述cbam模块不匹配yolov5工程代码,yolov5加入cbam注意力的代码以下述代码为准:(如果用这段代码,yolo.py和yaml文件中相应的CBAMC3也要换成CBAM,下面的SE同理)

  1. class ChannelAttention(nn.Module):
  2. def __init__(self, in_planes, ratio=16):
  3. super(ChannelAttention, self).__init__()
  4. self.avg_pool = nn.AdaptiveAvgPool2d(1)
  5. self.max_pool = nn.AdaptiveMaxPool2d(1)
  6. self.f1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
  7. self.relu = nn.ReLU()
  8. self.f2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
  9. # 写法二,亦可使用顺序容器
  10. # self.sharedMLP = nn.Sequential(
  11. # nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(),
  12. # nn.Conv2d(in_planes // rotio, in_planes, 1, bias=False))
  13. self.sigmoid = nn.Sigmoid()
  14. def forward(self, x):
  15. avg_out = self.f2(self.relu(self.f1(self.avg_pool(x))))
  16. max_out = self.f2(self.relu(self.f1(self.max_pool(x))))
  17. out = self.sigmoid(avg_out + max_out)
  18. return out
  19. class SpatialAttention(nn.Module):
  20. def __init__(self, kernel_size=7):
  21. super(SpatialAttention, self).__init__()
  22. assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
  23. padding = 3 if kernel_size == 7 else 1
  24. self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
  25. self.sigmoid = nn.Sigmoid()
  26. def forward(self, x):
  27. avg_out = torch.mean(x, dim=1, keepdim=True)
  28. max_out, _ = torch.max(x, dim=1, keepdim=True)
  29. x = torch.cat([avg_out, max_out], dim=1)
  30. x = self.conv(x)
  31. return self.sigmoid(x)
  32. class CBAM(nn.Module):
  33. # CSP Bottleneck with 3 convolutions
  34. def __init__(self, c1, c2, ratio=16, kernel_size=7): # ch_in, ch_out, number, shortcut, groups, expansion
  35. super(CBAM, self).__init__()
  36. # c_ = int(c2 * e) # hidden channels
  37. # self.cv1 = Conv(c1, c_, 1, 1)
  38. # self.cv2 = Conv(c1, c_, 1, 1)
  39. # self.cv3 = Conv(2 * c_, c2, 1)
  40. # self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
  41. self.channel_attention = ChannelAttention(c1, ratio)
  42. self.spatial_attention = SpatialAttention(kernel_size)
  43. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  44. def forward(self, x):
  45. out = self.channel_attention(x) * x
  46. # print('outchannels:{}'.format(out.shape))
  47. out = self.spatial_attention(out) * out
  48. return out

 1.这里是卷积注意力的代码,我一般喜欢加在common.py的C3模块后面,不需要做改动,傻瓜ctrl+c+v就可以了。

2.在yolo.py里做改动。在parse_model函数里将对应代码用以下代码替换,还是傻瓜ctrl+c+v。

  1. if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
  2. C3, C3TR,CBAMC3]:
  3. c1, c2 = ch[f], args[0]
  4. if c2 != no: # if not output
  5. c2 = make_divisible(c2 * gw, 8)
  6. args = [c1, c2, *args[1:]]
  7. if m in [BottleneckCSP, C3,CBAMC3]:
  8. args.insert(2, n) # number of repeats
  9. n = 1

3.在yaml文件里改动。比如你要用s网络,我是这样改的:将骨干网络中的C3模块全部替换为CBAMC3模块(这里需要注意的是,这样改动只能加载少部分预训练权重)。如果不想改动这么大,那么接着往下看。

pytorch中加入注意力机制(CBAM),以yolov5为例_YY_172的博客-CSDN博客_yolov5加注意力

这是首发将CBAM注意力添加到yolov5网络中的博主,我也是看了他的方法,侵删。

  1. backbone:
  2. # [from, number, module, args]
  3. [[-1, 1, Focus, [64, 3]], # 0-P1/2
  4. [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  5. [-1, 3,CBAMC3, [128]],
  6. [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  7. [-1, 9, CBAMC3, [256]],
  8. [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  9. [-1, 9, CBAMC3, [512]],
  10. [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  11. [-1, 1, SPP, [1024, [5, 9, 13]]],
  12. [-1, 3, CBAMC3, [1024, False]], # 9
  13. ]

 SE注意力

  1. class SELayer(nn.Module):
  2. def __init__(self, c1, r=16):
  3. super(SELayer, self).__init__()
  4. self.avgpool = nn.AdaptiveAvgPool2d(1)
  5. self.l1 = nn.Linear(c1, c1 // r, bias=False)
  6. self.relu = nn.ReLU(inplace=True)
  7. self.l2 = nn.Linear(c1 // r, c1, bias=False)
  8. self.sig = nn.Sigmoid()
  9. def forward(self, x):
  10. b, c, _, _ = x.size()
  11. y = self.avgpool(x).view(b, c)
  12. y = self.l1(y)
  13. y = self.relu(y)
  14. y = self.l2(y)
  15. y = self.sig(y)
  16. y = y.view(b, c, 1, 1)
  17. return x * y.expand_as(x)

2022.1.26SE代码更新 

受同一位大佬指正,上述部分的se代码同样没有匹配yolov5工程代码,将修改后的se代码贴出,se注意力的代码以下述为准:

  1. class SE(nn.Module):
  2. def __init__(self, c1, c2, r=16):
  3. super(SE, self).__init__()
  4. self.avgpool = nn.AdaptiveAvgPool2d(1)
  5. self.l1 = nn.Linear(c1, c1 // r, bias=False)
  6. self.relu = nn.ReLU(inplace=True)
  7. self.l2 = nn.Linear(c1 // r, c1, bias=False)
  8. self.sig = nn.Sigmoid()
  9. def forward(self, x):
  10. print(x.size())
  11. b, c, _, _ = x.size()
  12. y = self.avgpool(x).view(b, c)
  13. y = self.l1(y)
  14. y = self.relu(y)
  15. y = self.l2(y)
  16. y = self.sig(y)
  17. y = y.view(b, c, 1, 1)
  18. return x * y.expand_as(x)

1.这里是SE注意力的代码段,同上一个注意力的加法一样,我喜欢加在C3后面。

2.在yolo.py中做改动。

  1. def parse_model(d, ch): # model_dict, input_channels(3)
  2. logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
  3. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  4. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  5. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  6. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  7. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  8. m = eval(m) if isinstance(m, str) else m # eval strings
  9. for j, a in enumerate(args):
  10. try:
  11. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  12. except:
  13. pass
  14. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  15. if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
  16. C3, C3TR, CoordAtt, SELayer, eca_layer, CBAM]:
  17. c1, c2 = ch[f], args[0]
  18. if c2 != no: # if not output
  19. c2 = make_divisible(c2 * gw, 8)
  20. args = [c1, c2, *args[1:]]
  21. if m in [BottleneckCSP, C3, C3TR]:
  22. args.insert(2, n) # number of repeats
  23. n = 1
  24. elif m is nn.BatchNorm2d:
  25. args = [ch[f]]
  26. elif m is Concat:
  27. c2 = sum([ch[x] for x in f])
  28. elif m is Detect:
  29. args.append([ch[x] for x in f])
  30. if isinstance(args[1], int): # number of anchors
  31. args[1] = [list(range(args[1] * 2))] * len(f)
  32. elif m is Contract:
  33. c2 = ch[f] * args[0] ** 2
  34. elif m is Expand:
  35. c2 = ch[f] // args[0] ** 2
  36. else:
  37. c2 = ch[f]

3.在你要用的yaml文件中做改动。

  1. backbone:
  2. # [from, number, module, args]
  3. [[-1, 1, Focus, [64, 3]], # 0-P1/2
  4. [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  5. [-1, 3,C3, [128]],
  6. [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  7. [-1, 9, C3, [256]],
  8. [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  9. [-1, 9, C3, [512]],
  10. [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  11. [-1, 1, SPP, [1024, [5, 9, 13]]],
  12. [-1, 3, C3, [1024, False]], # 9
  13. [-1, 1, SELayer, [1024, 4]]
  14. ]

 运行成功后是这样的

 应该能看到那个注意力加在哪里了吧,这就是用上了。

这是我用的另一种添加注意力的方法,这种方法会加载预训练权重,推荐大家使用这种方法。既然推荐大家使用这种方法,那我推荐添加CBAM注意力那种方法目的是啥呢?哈哈哈哈再往下看。

天池竞赛-布匹缺陷检测baseline提升过程-给yolov5模型添加注意力机制_pprp的博客-CSDN博客_yolov5注意力机制

这是我看的将SE注意力添加到 yolov5模型里的博客,我同样也是引用了这位博主的方法,感谢分享,侵删。

 ECA注意力

  1. # class eca_layer(nn.Module):
  2. # """Constructs a ECA module.
  3. # Args:
  4. # channel: Number of channels of the input feature map
  5. # k_size: Adaptive selection of kernel size
  6. # """
  7. # def __init__(self, channel, k_size=3):
  8. # super(eca_layer, self).__init__()
  9. # self.avg_pool = nn.AdaptiveAvgPool2d(1)
  10. # self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
  11. # self.sigmoid = nn.Sigmoid()
  12. #
  13. # def forward(self, x):
  14. # # feature descriptor on the global spatial information
  15. # y = self.avg_pool(x)
  16. #
  17. # # Two different branches of ECA module
  18. # y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
  19. #
  20. # # Multi-scale information fusion
  21. # y = self.sigmoid(y)
  22. # x=x*y.expand_as(x)
  23. #
  24. # return x * y.expand_as(x)

1.这里是注意力代码片段,放到自己的脚本里把注释取消掉就可以了,添加的位置同上,这里就不说了。 

2.改动yolo.py。看以下代码段。

  1. if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
  2. C3, C3TR]:
  3. c1, c2 = ch[f], args[0]
  4. if c2 != no: # if not output
  5. c2 = make_divisible(c2 * gw, 8)
  6. args = [c1, c2, *args[1:]]
  7. if m in [BottleneckCSP, C3,eca_layer]:
  8. args.insert(2, n) # number of repeats
  9. n = 1
  10. elif m is nn.BatchNorm2d:
  11. args = [ch[f]]
  12. elif m is Concat:
  13. c2 = sum([ch[x] for x in f])
  14. elif m is Detect:
  15. args.append([ch[x] for x in f])
  16. if isinstance(args[1], int): # number of anchors
  17. args[1] = [list(range(args[1] * 2))] * len(f)
  18. elif m is Contract:
  19. c2 = ch[f] * args[0] ** 2
  20. elif m is Expand:
  21. c2 = ch[f] // args[0] ** 2
  22. elif m is eca_layer:
  23. channel=args[0]
  24. channel=make_divisible(channel*gw,8)if channel != no else channel
  25. args=[channel]
  26. else:
  27. c2 = ch[f]

 3.改动你要用的yaml文件。这里我要解释一下为什么交代了两种添加注意力的方法(第一种:将骨干里的C3全部替换掉;第二种:在骨干最后一层加注意力,做一个输出层)。第二种方法的模型目前还在跑,还没出结果,不过模型的结果也能猜个大概,有稳定的微小提升,detect效果不会提升太多;我在用第一种方法将ECA注意力全部替换掉骨干里的C3时,模型的p、r、map均出现了下降的情况,大概就是一个两个点,但是令人意外的是,他的检测效果很好,能够检测到未作改动前的模型很多检测不到的目标,当然也会比原模型出现更多的误检和漏检情况,手动改阈值后好了很多,因为数据集涉及到公司机密,所以这里就不放出来了,我做的是安全帽的检测,有兴趣的同学可以尝试一下这种添加注意力的方法。

看下其中一张的检测结果。

如果只是求提高模型准确率,推荐第二种方法。

 接下来就是发表在今年CVPR上的注意力了。

CoorAttention

  1. # class h_sigmoid(nn.Module):
  2. # def __init__(self, inplace=True):
  3. # super(h_sigmoid, self).__init__()
  4. # self.relu = nn.ReLU6(inplace=inplace)
  5. #
  6. # def forward(self, x):
  7. # return self.relu(x + 3) / 6
  8. #
  9. #
  10. # class h_swish(nn.Module):
  11. # def __init__(self, inplace=True):
  12. # super(h_swish, self).__init__()
  13. # self.sigmoid = h_sigmoid(inplace=inplace)
  14. #
  15. # def forward(self, x):
  16. # return x * self.sigmoid(x)
  17. # class CoordAtt(nn.Module):
  18. # def __init__(self, inp, oup, reduction=32):
  19. # super(CoordAtt, self).__init__()
  20. # self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
  21. # self.pool_w = nn.AdaptiveAvgPool2d((1, None))
  22. #
  23. # mip = max(8, inp // reduction)
  24. #
  25. # self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
  26. # self.bn1 = nn.BatchNorm2d(mip)
  27. # self.act = h_swish()
  28. #
  29. # self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
  30. # self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
  31. #
  32. # def forward(self, x):
  33. # identity = x
  34. #
  35. # n, c, h, w = x.size()
  36. # x_h = self.pool_h(x)
  37. # x_w = self.pool_w(x).permute(0, 1, 3, 2)
  38. #
  39. # y = torch.cat([x_h, x_w], dim=2)
  40. # y = self.conv1(y)
  41. # y = self.bn1(y)
  42. # y = self.act(y)
  43. #
  44. # x_h, x_w = torch.split(y, [h, w], dim=2)
  45. # x_w = x_w.permute(0, 1, 3, 2)
  46. #
  47. # a_h = self.conv_h(x_h).sigmoid()
  48. # a_w = self.conv_w(x_w).sigmoid()
  49. #
  50. # out = identity * a_w * a_h
  51. #
  52. # return out

 这是代码段,加在common.py的C3模块后面

 这里是改动yolo.py的部分,最后在yaml文件里的改动这里就不说了,前面提供了两种方法供大家使用,大家可以自行选择。

  1. def parse_model(d, ch): # model_dict, input_channels(3)
  2. logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
  3. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  4. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  5. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  6. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  7. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  8. m = eval(m) if isinstance(m, str) else m # eval strings
  9. for j, a in enumerate(args):
  10. try:
  11. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  12. except:
  13. pass
  14. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  15. if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
  16. C3, C3TR,CBAMC3,CoordAtt]:#
  17. c1, c2 = ch[f], args[0]
  18. if c2 != no: # if not output
  19. c2 = make_divisible(c2 * gw, 8)
  20. args = [c1, c2, *args[1:]]
  21. if m in [BottleneckCSP, C3, C3TR]:
  22. args.insert(2, n) # number of repeats
  23. n = 1
  24. elif m is nn.BatchNorm2d:
  25. args = [ch[f]]
  26. elif m is Concat:
  27. c2 = sum([ch[x] for x in f])
  28. elif m is Detect:
  29. args.append([ch[x] for x in f])
  30. if isinstance(args[1], int): # number of anchors
  31. args[1] = [list(range(args[1] * 2))] * len(f)
  32. elif m is Contract:
  33. c2 = ch[f] * args[0] ** 2
  34. elif m is Expand:
  35. c2 = ch[f] // args[0] ** 2
  36. # elif m is eca_layer:
  37. # channel=args[0]
  38. # channel=make_divisible(channel*gw,8)if channel != no else channel
  39. # args=[channel]
  40. elif m is CoordAtt:
  41. inp,oup,re = args[0],args[1],args[2]
  42. oup = make_divisible(oup * gw, 8) if oup != no else oup
  43. args = [inp,oup,re]
  44. else:
  45. c2 = ch[f]

后面的ECA和CA注意力添加方法是我对着前两位博主照葫芦画瓢,在我的本地运行多次,就俩字,好用,以后的注意力也可以按照这种方法去添加。

yolov5-6.0版本的注意力添加方法请移步这里

各种注意力的添加方法以及如何work,我都懂一些,如果有需要的朋友可以联系我,赚点生活费。


2022.2.14更:本人已实现使用densenet替换focus、neck中fpn结构改为bi-fpn代码,有需要的小伙伴请私聊,赚点生活费。可用于毕业以及硕士小论文发表的trick。 

 不胜感激,最后祝大家年薪百万。

扯完了。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/菜鸟追梦旅行/article/detail/346260
推荐阅读
相关标签
  

闽ICP备14008679号