当前位置:   article > 正文

yolo增加RFEM_c3rfem

c3rfem

论文地址:https://arxiv.org/pdf/2208.02019.pdf

代码地址:GitHub - Krasjet-Yu/YOLO-FaceV2: YOLO-FaceV2: A Scale and Occlusion Aware Face Detector

总的来说就是RFEM利用了感受野在特征图中的优势,通过使用不同膨胀卷积率的分支来捕捉多尺度信息和不同范围的依赖关系。这种设计有助于减少参数数量,降低过拟合风险,并充分利用每个样本。

1、yolov7-tiny

 创建配置文件yolov7-tiny-RFEM.yaml

  1. # parameters
  2. nc: 80 # number of classes
  3. depth_multiple: 1.0 # model depth multiple
  4. width_multiple: 1.0 # layer channel multiple
  5. activation: nn.LeakyReLU(0.1)
  6. # anchors
  7. anchors:
  8. - [10,13, 16,30, 33,23] # P3/8
  9. - [30,61, 62,45, 59,119] # P4/16
  10. - [116,90, 156,198, 373,326] # P5/32
  11. # yolov7-tiny backbone
  12. backbone:
  13. # [from, number, module, args] c2, k=1, s=1, p=None, g=1, act=True
  14. [[-1, 1, Conv, [32, 3, 2, None, 1]], # 0-P1/2
  15. [-1, 1, Conv, [64, 3, 2, None, 1]], # 1-P2/4
  16. [-1, 1, Conv, [32, 1, 1, None, 1]],
  17. [-2, 1, Conv, [32, 1, 1, None, 1]],
  18. [-1, 1, Conv, [32, 3, 1, None, 1]],
  19. [-1, 1, Conv, [32, 3, 1, None, 1]],
  20. [[-1, -2, -3, -4], 1, Concat, [1]],
  21. [-1, 1, Conv, [64, 1, 1, None, 1]], # 7
  22. [-1, 1, MP, []], # 8-P3/8
  23. [-1, 1, Conv, [64, 1, 1, None, 1]],
  24. [-2, 1, Conv, [64, 1, 1, None, 1]],
  25. [-1, 1, Conv, [64, 3, 1, None, 1]],
  26. [-1, 1, Conv, [64, 3, 1, None, 1]],
  27. [[-1, -2, -3, -4], 1, Concat, [1]],
  28. [-1, 1, Conv, [128, 1, 1, None, 1]], # 14
  29. [-1, 1, MP, []], # 15-P4/16
  30. [-1, 1, Conv, [128, 1, 1, None, 1]],
  31. [-2, 1, Conv, [128, 1, 1, None, 1]],
  32. [-1, 1, Conv, [128, 3, 1, None, 1]],
  33. [-1, 1, Conv, [128, 3, 1, None, 1]],
  34. [[-1, -2, -3, -4], 1, Concat, [1]],
  35. [-1, 1, Conv, [256, 1, 1, None, 1]], # 21
  36. [-1, 1, MP, []], # 22-P5/32
  37. [-1, 1, Conv, [256, 1, 1, None, 1]],
  38. [-2, 1, Conv, [256, 1, 1, None, 1]],
  39. [-1, 1, Conv, [256, 3, 1, None, 1]],
  40. [-1, 1, Conv, [256, 3, 1, None, 1]],
  41. [[-1, -2, -3, -4], 1, Concat, [1]],
  42. [-1, 1, Conv, [512, 1, 1, None, 1]], # 28
  43. ]
  44. # yolov7-tiny head
  45. head:
  46. [[-1, 1, Conv, [256, 1, 1, None, 1]],
  47. [-2, 1, Conv, [256, 1, 1, None, 1]],
  48. [-1, 1, SP, [5]],
  49. [-2, 1, SP, [9]],
  50. [-3, 1, SP, [13]],
  51. [[-1, -2, -3, -4], 1, Concat, [1]],
  52. [-1, 1, Conv, [256, 1, 1, None, 1]],
  53. [[-1, -7], 1, Concat, [1]],
  54. [-1, 1, Conv, [256, 1, 1, None, 1]], # 37
  55. [-1, 1, RFEM, [256]],
  56. [-1, 1, Conv, [128, 1, 1, None, 1]],
  57. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  58. [21, 1, Conv, [128, 1, 1, None, 1]], # route backbone P4
  59. [[-1, -2], 1, Concat, [1]],
  60. [-1, 1, Conv, [64, 1, 1, None, 1]],
  61. [-2, 1, Conv, [64, 1, 1, None, 1]],
  62. [-1, 1, Conv, [64, 3, 1, None, 1]],
  63. [-1, 1, Conv, [64, 3, 1, None, 1]],
  64. [[-1, -2, -3, -4], 1, Concat, [1]],
  65. [-1, 1, Conv, [128, 1, 1, None, 1]], # 48
  66. [-1, 1, Conv, [64, 1, 1, None, 1]],
  67. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  68. [14, 1, Conv, [64, 1, 1, None, 1]], # route backbone P3
  69. [[-1, -2], 1, Concat, [1]],
  70. [-1, 1, Conv, [32, 1, 1, None, 1]],
  71. [-2, 1, Conv, [32, 1, 1, None, 1]],
  72. [-1, 1, Conv, [32, 3, 1, None, 1]],
  73. [-1, 1, Conv, [32, 3, 1, None, 1]],
  74. [[-1, -2, -3, -4], 1, Concat, [1]],
  75. [-1, 1, Conv, [64, 1, 1, None, 1]], # 58
  76. [-1, 1, Conv, [128, 3, 2, None, 1]],
  77. [[-1, 48], 1, Concat, [1]],
  78. [-1, 1, Conv, [64, 1, 1, None, 1]],
  79. [-2, 1, Conv, [64, 1, 1, None, 1]],
  80. [-1, 1, Conv, [64, 3, 1, None, 1]],
  81. [-1, 1, Conv, [64, 3, 1, None, 1]],
  82. [[-1, -2, -3, -4], 1, Concat, [1]],
  83. [-1, 1, Conv, [128, 1, 1, None, 1]], # 66
  84. [-1, 1, Conv, [256, 3, 2, None, 1]],
  85. [[-1, 37], 1, Concat, [1]],
  86. [-1, 1, Conv, [128, 1, 1, None, 1]],
  87. [-2, 1, Conv, [128, 1, 1, None, 1]],
  88. [-1, 1, Conv, [128, 3, 1, None, 1]],
  89. [-1, 1, Conv, [128, 3, 1, None, 1]],
  90. [[-1, -2, -3, -4], 1, Concat, [1]],
  91. [-1, 1, Conv, [256, 1, 1, None, 1]], # 74
  92. [58, 1, Conv, [128, 3, 1, None, 1]],
  93. [66, 1, Conv, [256, 3, 1, None, 1]],
  94. [74, 1, Conv, [512, 3, 1, None, 1]],
  95. [[75,76,77], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
  96. ]

在common.py中增加

  1. # RFEM
  2. class TridentBlock(nn.Module):
  3. def __init__(self, c1, c2, stride=1, c=False, e=0.5, padding=[1, 2, 3], dilate=[1, 2, 3], bias=False):
  4. super(TridentBlock, self).__init__()
  5. self.stride = stride
  6. self.c = c
  7. c_ = int(c2 * e)
  8. self.padding = padding
  9. self.dilate = dilate
  10. self.share_weightconv1 = nn.Parameter(torch.Tensor(c_, c1, 1, 1))
  11. self.share_weightconv2 = nn.Parameter(torch.Tensor(c2, c_, 3, 3))
  12. self.bn1 = nn.BatchNorm2d(c_)
  13. self.bn2 = nn.BatchNorm2d(c2)
  14. # self.act = nn.SiLU()
  15. self.act = Conv.default_act
  16. nn.init.kaiming_uniform_(self.share_weightconv1, nonlinearity="relu")
  17. nn.init.kaiming_uniform_(self.share_weightconv2, nonlinearity="relu")
  18. if bias:
  19. self.bias = nn.Parameter(torch.Tensor(c2))
  20. else:
  21. self.bias = None
  22. if self.bias is not None:
  23. nn.init.constant_(self.bias, 0)
  24. def forward_for_small(self, x):
  25. residual = x
  26. out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias)
  27. out = self.bn1(out)
  28. out = self.act(out)
  29. out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride,
  30. padding=self.padding[0],
  31. dilation=self.dilate[0])
  32. out = self.bn2(out)
  33. out += residual
  34. out = self.act(out)
  35. return out
  36. def forward_for_middle(self, x):
  37. residual = x
  38. out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias)
  39. out = self.bn1(out)
  40. out = self.act(out)
  41. out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride,
  42. padding=self.padding[1],
  43. dilation=self.dilate[1])
  44. out = self.bn2(out)
  45. out += residual
  46. out = self.act(out)
  47. return out
  48. def forward_for_big(self, x):
  49. residual = x
  50. out = nn.functional.conv2d(x, self.share_weightconv1, bias=self.bias)
  51. out = self.bn1(out)
  52. out = self.act(out)
  53. out = nn.functional.conv2d(out, self.share_weightconv2, bias=self.bias, stride=self.stride,
  54. padding=self.padding[2],
  55. dilation=self.dilate[2])
  56. out = self.bn2(out)
  57. out += residual
  58. out = self.act(out)
  59. return out
  60. def forward(self, x):
  61. xm = x
  62. base_feat = []
  63. if self.c is not False:
  64. x1 = self.forward_for_small(x)
  65. x2 = self.forward_for_middle(x)
  66. x3 = self.forward_for_big(x)
  67. else:
  68. x1 = self.forward_for_small(xm[0])
  69. x2 = self.forward_for_middle(xm[1])
  70. x3 = self.forward_for_big(xm[2])
  71. base_feat.append(x1)
  72. base_feat.append(x2)
  73. base_feat.append(x3)
  74. return base_feat
  75. class RFEM(nn.Module):
  76. def __init__(self, c1, c2, n=1, e=0.5, stride=1):
  77. super(RFEM, self).__init__()
  78. c = True
  79. layers = []
  80. layers.append(TridentBlock(c1, c2, stride=stride, c=c, e=e))
  81. c1 = c2
  82. for i in range(1, n):
  83. layers.append(TridentBlock(c1, c2))
  84. self.layer = nn.Sequential(*layers)
  85. # self.cv = Conv(c2, c2)
  86. self.bn = nn.BatchNorm2d(c2)
  87. # self.act = nn.SiLU()
  88. self.act = Conv.default_act
  89. def forward(self, x):
  90. out = self.layer(x)
  91. out = out[0] + out[1] + out[2] + x
  92. out = self.act(self.bn(out))
  93. return out
  94. class C3RFEM(nn.Module):
  95. def __init__(self, c1, c2, n=1, shortcut=True, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  96. super().__init__()
  97. c_ = int(c2 * e) # hidden channels
  98. self.cv1 = Conv(c1, c_, 1, 1)
  99. self.cv2 = Conv(c1, c_, 1, 1)
  100. self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
  101. # self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  102. # self.rfem = RFEM(c_, c_, n)
  103. self.m = nn.Sequential(*[RFEM(c_, c_, n=1, e=e) for _ in range(n)])
  104. # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
  105. def forward(self, x):
  106. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))

其中C3RFEM对应原作者的实现

在yolo.py中修改:

  1. n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
  2. if m in {
  3. Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
  4. BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, StemBlock,
  5. BlazeBlock, DoubleBlazeBlock, ShuffleV2Block, MobileBottleneck, InvertedResidual, ConvBNReLU,
  6. RepVGGBlock, SEBlock, RepBlock, SimCSPSPPF, C3_P, SPPCSPC, RepConv, RFEM, C3RFEM}:
  7. c1, c2 = ch[f], args[0]
  8. if c2 != no: # if not output
  9. c2 = make_divisible(c2 * gw, 8)
  10. if m == InvertedResidual:
  11. c2 = make_divisible(c2 * gw, 4 if gw == 0.1 else 8)
  12. args = [c1, c2, *args[1:]]
  13. if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x, C3_P, C3RFEM}:
  14. args.insert(2, n) # number of repeats
  15. n = 1

运行yolo.py

2、yolov5

yolov5s-RFEM.yaml

  1. # YOLOv5
    声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/菜鸟追梦旅行/article/detail/93958
    推荐阅读
    相关标签