赞
踩
yolov6s整体:
model/yolo.py
Model:
class Model(nn.Module):
def __init__():
self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)
def forward(self, x):
x = self.backbone(x)
x = self.neck(x)
x = self.detect(x)
return x
在build_network中生成backbone,neck detect (其中,backbone为EfficientRep)
EfficientRep位于yolov6/model/efficentrep.py
def build_network(config, channels, num_classes, anchors, num_layers): backbone = EfficientRep( in_channels=channels, channels_list=channels_list, num_repeats=num_repeat, block=block ) neck = NECK( channels_list=channels_list, num_repeats=num_repeat, block=block ) head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max) head = Detect(num_classes, anchors, num_layers, head_layers=head_layers, use_dfl=use_dfl)
block=RepVGGBlock
class EfficientRep(nn.Module): def __init__( self, in_channels=3, block=RepVGGBlock ): super().__init__() self.stem = RepVGGBlock( ) self.ERBlock_2 = nn.Sequential( RepVGGBlock(), RepBlock( in_channels=channels_list[1], out_channels=channels_list[1], n=num_repeats[1], block=RepVGGBlock, ) ) self.ERBlock_3 = nn.Sequential( RepVGGBlock( ), RepBlock( in_channels=channels_list[2], out_channels=channels_list[2], n=num_repeats[2], block=RepVGGBlock, ) ) self.ERBlock_4 = nn.Sequential( RepVGGBlock( ), RepBlock( in_channels=channels_list[3], out_channels=channels_list[3], n=num_repeats[3], block=RepVGGBlock, ) ) self.ERBlock_5 = nn.Sequential( RepVGGBlock( ), RepBlock( in_channels=channels_list[4], out_channels=channels_list[4], n=num_repeats[4], block=RepVGGBlock, ), SimSPPF( in_channels=channels_list[4], out_channels=channels_list[4], kernel_size=5 ) ) def forward(self, x): outputs = [] x = self.stem(x) x = self.ERBlock_2(x) x = self.ERBlock_3(x) outputs.append(x) x = self.ERBlock_4(x) outputs.append(x) x = self.ERBlock_5(x) outputs.append(x) return tuple(outputs)
stem=RepVGGBlock
RepVGGBlock RVB
RepVGGBlock(
(nonlinearity): ReLU(inplace=True)
(se): Identity()
(rbr_dense): Sequential(
(conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
)
(rbr_1x1): Sequential(
(conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
)
RepVGGBlock
在yolov6/layer/common.py中,deploy表示推理过程。
class RepVGGBlock(nn.Module): '''RepVGGBlock is a basic rep-style block, including training and deploy status This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py ''' def __init__ if deploy: self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) else: self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups) self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups) def forward(self, inputs): return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
训练过程包含33 11 及bn层,三个分支相加输出,在部署时,为了方便部署,直接取3*3的主分支卷积输出。
RepBlock为N个RepVGG Block
RB为几个RVB的串联,其中第一个RVB用于特征层的size变化(stride=2,下采样),后面N个RVB 用于特征层的融合,size保持不变。
ERBlock2
一个RVB +RB(1RVB+1 RVB)
ERBlock3
一个RVB +RB(1RVB+3 RVB)
ERBlock4
一个RVB +RB(1RVB+5 RVB)
ERBlock_5
一个RVB +RB (1RVB+1 RVB)+SimSPPF
class SimSPPF(nn.Module): '''Simplified SPPF with ReLU activation''' def __init__(self, in_channels, out_channels, kernel_size=5): super().__init__() c_ = in_channels // 2 # hidden channels self.cv1 = SimConv(in_channels, c_, 1, 1) self.cv2 = SimConv(c_ * 4, out_channels, 1, 1) self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') y1 = self.m(x) y2 = self.m(y1) return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
其中SConv是有conv+BN+ReLu组成, 先通过一个SConv层,特征图h,w的size不变,outchannel变成inchannel的一半
class SimConv(nn.Module):
'''Normal Conv with ReLU activation'''
def __init__():
padding = kernel_size // 2
self.conv = nn.Conv2d( )
self.bn = nn.BatchNorm2d(out_channels)
self.act = nn.ReLU()
def forward(self, x):
return self.act(self.bn(self.conv(x)))
在这里插入图片描述
simconv输出做为一个分支,而后经过3个maxpooling层,每个maxpooling的kernel=5,s=1,padding=kernel//2, 每经过一个maxpooling后,fm size均不变,并做为分支。而后通过cat将几个分支在channel维度上相加,得到的size较于SPPF的输入,h,w不变,channel为输入的2倍,最后再通过一个SConv层,通道减半,使得输入和输出的fm size不变。
Sconv就是1*1
class RepPANNeck(nn.Module): """RepPANNeck Module EfficientRep is the default backbone of this model. RepPANNeck has the balance of feature fusion ability and hardware efficiency. """ def __init__( block=RepVGGBlock ): self.Rep_p4 = RepBlock( in_channels=channels_list[3] + channels_list[5], out_channels=channels_list[5], n=num_repeats[5], block=block ) self.Rep_p3 = RepBlock( in_channels=channels_list[2] + channels_list[6], out_channels=channels_list[6], n=num_repeats[6], block=block ) self.Rep_n3 = RepBlock( in_channels=channels_list[6] + channels_list[7], out_channels=channels_list[8], n=num_repeats[7], block=block ) self.Rep_n4 = RepBlock( in_channels=channels_list[5] + channels_list[9], out_channels=channels_list[10], n=num_repeats[8], block=block ) self.reduce_layer0 = SimConv( in_channels=channels_list[4], out_channels=channels_list[5], kernel_size=1, stride=1 ) self.upsample0 = Transpose( in_channels=channels_list[5], out_channels=channels_list[5], ) self.reduce_layer1 = SimConv( in_channels=channels_list[5], out_channels=channels_list[6], kernel_size=1, stride=1 ) self.upsample1 = Transpose( in_channels=channels_list[6], out_channels=channels_list[6] ) self.downsample2 = SimConv( in_channels=channels_list[6], out_channels=channels_list[7], kernel_size=3, stride=2 ) self.downsample1 = SimConv( in_channels=channels_list[8], out_channels=channels_list[9], kernel_size=3, stride=2 ) def forward(self, input): (x2, x1, x0) = input fpn_out0 = self.reduce_layer0(x0) upsample_feat0 = self.upsample0(fpn_out0) f_concat_layer0 = torch.cat([upsample_feat0, x1], 1) f_out0 = self.Rep_p4(f_concat_layer0) fpn_out1 = self.reduce_layer1(f_out0) upsample_feat1 = self.upsample1(fpn_out1) f_concat_layer1 = torch.cat([upsample_feat1, x2], 1) pan_out2 = self.Rep_p3(f_concat_layer1) down_feat1 = self.downsample2(pan_out2) p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1) pan_out1 = self.Rep_n3(p_concat_layer1) down_feat0 = self.downsample1(pan_out1) p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1) pan_out0 = self.Rep_n4(p_concat_layer2) outputs = [pan_out2, pan_out1, pan_out0] return outputs
Neck层美团官方称其为Rep-PAN,是基于PAN的拓扑方法,如上图所示,类似一种“U”型结构,其中U型左侧从上到下fm的h,w增大,右侧从下到上fm的h,w减小,其中Upsample上采样基于torch官方自带的转置卷积实现
整个neck层的流程为,U型左侧,从ERB5输出20x20x512的fm,通过SConv 变成20x20x128大小,上采样后h,w较之前增大一倍后与ERB4的输出在channel层上concate后fm变成40x40x384,通过一个RB(s=1, o≠i)后,输出 40x40x128,重复上述步骤后,输出8x080x64的fm。U型右侧,将80x80x64的fm先SConv下采样,得到40x40x64的fm,与U型左侧h,w一致的fm在channel层上concate后,通过一个RB(s=1, o≠i),输出第二个fm,重复U型右侧以上步骤,输出第三个fm。至此,neck层输出三个fm分别为(20x20x256, 40x40x128, 80x80x64).
从结构上来看,其实还是用了YOLO V5这PANET结构,只不过将其中的CSPDarknet换做了RepBlock。
整个head借鉴了yolox中的解耦头设计,并对其做了改进,head流程如下:从neck层输出三个分支,对于每个分支,先对输出fm通过BConv层(11),做fm的特征融合后,分成两个分支,一个分支通过Conv(33)+BConv完成分类任务的预测,另外一个分支先通过Conv融合特征后再分成两个分支,一个分支通过BConv完成边框的回归,一个分支通过BConv完成前后背景的分类,至此三个分支再通过concate在channel层上融合,输出未经后处理的预测结果。
class Detect(nn.Module): def forward(self, x): if self.training: cls_score_list = [] reg_distri_list = [] for i in range(self.nl): x[i] = self.stems[i](x[i]) cls_x = x[i] reg_x = x[i] cls_feat = self.cls_convs[i](cls_x) cls_output = self.cls_preds[i](cls_feat) reg_feat = self.reg_convs[i](reg_x) reg_output = self.reg_preds[i](reg_feat) cls_output = torch.sigmoid(cls_output) cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1))) reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1))) cls_score_list = torch.cat(cls_score_list, axis=1) reg_distri_list = torch.cat(reg_distri_list, axis=1) return x, cls_score_list, reg_distri_list else: cls_score_list = [] reg_dist_list = [] anchor_points, stride_tensor = generate_anchors( x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True) for i in range(self.nl): b, _, h, w = x[i].shape l = h * w x[i] = self.stems[i](x[i]) cls_x = x[i] reg_x = x[i] cls_feat = self.cls_convs[i](cls_x) cls_output = self.cls_preds[i](cls_feat) reg_feat = self.reg_convs[i](reg_x) reg_output = self.reg_preds[i](reg_feat) if self.use_dfl: reg_output = reg_output.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3) reg_output = self.proj_conv(F.softmax(reg_output, dim=1)) cls_output = torch.sigmoid(cls_output) cls_score_list.append(cls_output.reshape([b, self.nc, l])) reg_dist_list.append(reg_output.reshape([b, 4, l])) cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1) reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1) pred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh') pred_bboxes *= stride_tensor return torch.cat( [ pred_bboxes, torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype), cls_score_list ], axis=-1)
ref
https://blog.csdn.net/zqwwwm/article/details/125635594
https://mp.weixin.qq.com/s/RrQCP4pTSwpTmSgvly9evg
https://zhuanlan.zhihu.com/p/353697121
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。