当前位置:   article > 正文

YOLOv7的yolov7.yaml详解_yolov7.yaml的作用

yolov7.yaml的作用

将YOLOv7流程图与yolov7.yaml文件进行一一对应,相互匹配,便于理解整个网络过程。

具体注释如下

  1. # parameters
  2. nc: 80 # number of classes
  3. depth_multiple: 1.0 # model depth multiple
  4. width_multiple: 1.0 # layer channel multiple
  5. # anchors
  6. anchors:
  7. - [12,16, 19,36, 40,28] # P3/8
  8. - [36,75, 76,55, 72,146] # P4/16
  9. - [142,110, 192,243, 459,401] # P5/32
  10. # yolov7 backbone
  11. backbone:
  12. # [from, number, module, args] 输入为 640*640*3
  13. [[-1, 1, Conv, [32, 3, 1]], # 0 第零层
  14. [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 320*320*64
  15. [-1, 1, Conv, [64, 3, 1]],
  16. [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 160*160*128
  17. # 该部分整体为ELAN,H和W不发生变化,倒数第二行channel数由C变为2C,最终输出channel由倒数第一行的Conv决定,该部分是2C
  18. [-1, 1, Conv, [64, 1, 1]],
  19. [-2, 1, Conv, [64, 1, 1]],
  20. [-1, 1, Conv, [64, 3, 1]],
  21. [-1, 1, Conv, [64, 3, 1]],
  22. [-1, 1, Conv, [64, 3, 1]],
  23. [-1, 1, Conv, [64, 3, 1]],
  24. [[-1, -3, -5, -6], 1, Concat, [1]], # 160*160*256 Concat是在channel维度上进行拼接
  25. [-1, 1, Conv, [256, 1, 1]], # 11 160*160*256
  26. # 该部分为MP1,H和W变为原来的0.5倍,C不变
  27. [-1, 1, MP, []], # 80*80*256 MP为最大池化MaxPooling,H和W变为原来的0.5倍,channel不变
  28. [-1, 1, Conv, [128, 1, 1]],
  29. [-3, 1, Conv, [128, 1, 1]],
  30. [-1, 1, Conv, [128, 3, 2]],
  31. [[-1, -3], 1, Concat, [1]], # 16-P3/8 80*80*256
  32. # ELAN,H和W不发生变化,倒数第二行channel数由C变为2C,最终输出channel由倒数第一行的Conv决定,该部分是2C
  33. [-1, 1, Conv, [128, 1, 1]],
  34. [-2, 1, Conv, [128, 1, 1]],
  35. [-1, 1, Conv, [128, 3, 1]],
  36. [-1, 1, Conv, [128, 3, 1]],
  37. [-1, 1, Conv, [128, 3, 1]],
  38. [-1, 1, Conv, [128, 3, 1]],
  39. [[-1, -3, -5, -6], 1, Concat, [1]], # 80*80*512
  40. [-1, 1, Conv, [512, 1, 1]], # 24 80*80*512
  41. # MP1 + ELAN
  42. [-1, 1, MP, []],
  43. [-1, 1, Conv, [256, 1, 1]],
  44. [-3, 1, Conv, [256, 1, 1]],
  45. [-1, 1, Conv, [256, 3, 2]],
  46. [[-1, -3], 1, Concat, [1]], # 29-P4/16 40*40*512
  47. [-1, 1, Conv, [256, 1, 1]],
  48. [-2, 1, Conv, [256, 1, 1]],
  49. [-1, 1, Conv, [256, 3, 1]],
  50. [-1, 1, Conv, [256, 3, 1]],
  51. [-1, 1, Conv, [256, 3, 1]],
  52. [-1, 1, Conv, [256, 3, 1]],
  53. [[-1, -3, -5, -6], 1, Concat, [1]], # 40*40*1024
  54. [-1, 1, Conv, [1024, 1, 1]], # 37 40*40*1024
  55. # MP1 + ELAN 注意该部分ELAN与前面的不同,输出的channel与之前相同
  56. [-1, 1, MP, []],
  57. [-1, 1, Conv, [512, 1, 1]],
  58. [-3, 1, Conv, [512, 1, 1]],
  59. [-1, 1, Conv, [512, 3, 2]],
  60. [[-1, -3], 1, Concat, [1]], # 42-P5/32 20*20*1024
  61. [-1, 1, Conv, [256, 1, 1]],
  62. [-2, 1, Conv, [256, 1, 1]],
  63. [-1, 1, Conv, [256, 3, 1]],
  64. [-1, 1, Conv, [256, 3, 1]],
  65. [-1, 1, Conv, [256, 3, 1]],
  66. [-1, 1, Conv, [256, 3, 1]],
  67. [[-1, -3, -5, -6], 1, Concat, [1]], # 20*20*1024
  68. [-1, 1, Conv, [1024, 1, 1]], # 50 20*20*1024
  69. ]
  70. # yolov7 head
  71. head:
  72. [[-1, 1, SPPCSPC, [512]], # 51 20*20*512 最终输出只有channel发生变化
  73. # UP+CONCAT 前两行构成UP模块,第三行对第二组MP1+ELAN后的输出(# 37)进行卷积,最后统一concat
  74. [-1, 1, Conv, [256, 1, 1]],
  75. [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 40*40*256 上采样,H和W变为原来的2倍,channel不变
  76. [37, 1, Conv, [256, 1, 1]], # route backbone P4
  77. [[-1, -2], 1, Concat, [1]], # 40*40*512
  78. # ELAN-H
  79. [-1, 1, Conv, [256, 1, 1]],
  80. [-2, 1, Conv, [256, 1, 1]],
  81. [-1, 1, Conv, [128, 3, 1]],
  82. [-1, 1, Conv, [128, 3, 1]],
  83. [-1, 1, Conv, [128, 3, 1]],
  84. [-1, 1, Conv, [128, 3, 1]],
  85. [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], # 40*40*1024
  86. [-1, 1, Conv, [256, 1, 1]], # 63 40*40*256
  87. # UP+CONCAT
  88. [-1, 1, Conv, [128, 1, 1]],
  89. [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 80*80*128
  90. [24, 1, Conv, [128, 1, 1]], # route backbone P3
  91. [[-1, -2], 1, Concat, [1]], # 80*80*256
  92. # ELAN-H
  93. [-1, 1, Conv, [128, 1, 1]],
  94. [-2, 1, Conv, [128, 1, 1]],
  95. [-1, 1, Conv, [64, 3, 1]],
  96. [-1, 1, Conv, [64, 3, 1]],
  97. [-1, 1, Conv, [64, 3, 1]],
  98. [-1, 1, Conv, [64, 3, 1]],
  99. [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], # 80*80*512
  100. [-1, 1, Conv, [128, 1, 1]], # 75 80*80*128
  101. # MP2
  102. [-1, 1, MP, []], # 40*40*128
  103. [-1, 1, Conv, [128, 1, 1]],
  104. [-3, 1, Conv, [128, 1, 1]],
  105. [-1, 1, Conv, [128, 3, 2]], # 40*40*128
  106. [[-1, -3, 63], 1, Concat, [1]], # 40*40*512
  107. # ELAN-H
  108. [-1, 1, Conv, [256, 1, 1]],
  109. [-2, 1, Conv, [256, 1, 1]],
  110. [-1, 1, Conv, [128, 3, 1]],
  111. [-1, 1, Conv, [128, 3, 1]],
  112. [-1, 1, Conv, [128, 3, 1]],
  113. [-1, 1, Conv, [128, 3, 1]],
  114. [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], # 40*40*1024
  115. [-1, 1, Conv, [256, 1, 1]], # 88 40*40*256
  116. # MP2
  117. [-1, 1, MP, []], # 20*20*256
  118. [-1, 1, Conv, [256, 1, 1]],
  119. [-3, 1, Conv, [256, 1, 1]],
  120. [-1, 1, Conv, [256, 3, 2]], # 20*20*256
  121. [[-1, -3, 51], 1, Concat, [1]], # 20*20*1024
  122. # ELAN-H
  123. [-1, 1, Conv, [512, 1, 1]],
  124. [-2, 1, Conv, [512, 1, 1]],
  125. [-1, 1, Conv, [256, 3, 1]],
  126. [-1, 1, Conv, [256, 3, 1]],
  127. [-1, 1, Conv, [256, 3, 1]],
  128. [-1, 1, Conv, [256, 3, 1]],
  129. [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], # 20*20*2048
  130. [-1, 1, Conv, [512, 1, 1]], # 101 20*20*512
  131. # RepConv是重参数化卷积,用3*3卷积重参数化,加速推理,不改变H和W,具体内容参考论文
  132. [75, 1, RepConv, [256, 3, 1]],
  133. [88, 1, RepConv, [512, 3, 1]],
  134. [101, 1, RepConv, [1024, 3, 1]],
  135. [[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5)
  136. ]

 参考链接:深入浅出 Yolo 系列之 Yolov7 基础网络结构详解_yolov7网络结构_计算机视觉linke的博客-CSDN博客

上图为参照的V7结构图,其中,右下方的MP2的输出是80*80*256,个人计算结果是40*40*256。

MP1、MP2、UP模块看结构图不易理解,容易误解,建议自己梳理yaml文件,思路很清晰。

图里的ELAN模块比论文的好理解很多,

SPPCSPC在yaml文件里没有解释,具体代码在common.py里,附上代码如下,可结合流程图梳理,很清晰

  1. class SPPCSPC(nn.Module):
  2. # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
  3. def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
  4. super(SPPCSPC, self).__init__()
  5. c_ = int(2 * c2 * e) # hidden channels
  6. self.cv1 = Conv(c1, c_, 1, 1)
  7. self.cv2 = Conv(c1, c_, 1, 1)
  8. self.cv3 = Conv(c_, c_, 3, 1)
  9. self.cv4 = Conv(c_, c_, 1, 1)
  10. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  11. self.cv5 = Conv(4 * c_, c_, 1, 1)
  12. self.cv6 = Conv(c_, c_, 3, 1)
  13. self.cv7 = Conv(2 * c_, c2, 1, 1)
  14. def forward(self, x):
  15. x1 = self.cv4(self.cv3(self.cv1(x)))
  16. y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
  17. y2 = self.cv2(x)
  18. return self.cv7(torch.cat((y1, y2), dim=1))

最右面的是检测头部分,自下到上为P3 P4 P5

RepConv为重参数化卷积,简单来说就是在训练时的多分支结构等效为推理时的单路径结构,精度提升一点点,速度提升很多,个人看完RepVGG论文后感觉很有意义,数学推导很有意思也很有道理。附上论文中的示意图以及参考博主的流程图

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/124192
推荐阅读
相关标签
  

闽ICP备14008679号