赞
踩
直接在 detectron2/detectron2/modeling/backbone/resnet.py
中添加 SELayer
,若想自己新建一个文件,重新写模型类,这样 fpn.py
等文件中的 build_resnet_backbone
等函数均要修改,担心自己漏了,所以直接在原文件上添加了。
其中 SENet-Pytorch
的代码参考 https://github.com/moskomule/senet.pytorch/blob/master/senet。
__all__ = [ "SELayer", "ResNetBlockBase", "BasicBlock", "BottleneckBlock", "DeformBottleneckBlock", "BasicStem", "ResNet", "make_stage", "build_resnet_backbone", ] class SELayer(nn.Module): def __init__(self, channel, reduction=16): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, channel // reduction, bias=False), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel, bias=False), nn.Sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) print(f"shape: {y.shape}") y = self.fc(y).view(b, c, 1, 1) return x * y.expand_as(x) class BottleneckBlock(CNNBlockBase): """ The standard bottleneck residual block used by ResNet-50, 101 and 152 defined in :paper:`ResNet`. It contains 3 conv layers with kernels 1x1, 3x3, 1x1, and a projection shortcut if needed. """ def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, ): """ Args: bottleneck_channels (int): number of output channels for the 3x3 "bottleneck" conv layers. num_groups (int): number of groups for the 3x3 conv layer. norm (str or callable): normalization for all conv layers. See :func:`layers.get_norm` for supported format. stride_in_1x1 (bool): when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. dilation (int): the dilation rate of the 3x3 conv layer. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) self.se = SELayer(bottleneck_channels * 4, 16) # Zero-initialize the last normalization in each residual branch, # so that at the beginning, the residual branch starts with zeros, # and each residual block behaves like an identity. # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": # "For BN layers, the learnable scaling coefficient γ is initialized # to be 1, except for each residual block's last BN # where γ is initialized to be 0." # nn.init.constant_(self.conv3.norm.weight, 0) # TODO this somehow hurts performance when training GN models from scratch. # Add it as an option when we need to use this code to train a backbone. def forward(self, x): out = self.conv1(x) out = F.relu_(out) out = self.conv2(out) out = F.relu_(out) out = self.conv3(out) out = self.se(out) # 添加了 SELayer if self.shortcut is not None: shortcut = self.shortcut(x) else: shortcut = x out += shortcut out = F.relu_(out) return out
Mask2Former 调用的是 Detectron2 中 ResNet
,所以在 Detectron2 中添加 SE 模块后,在 Mask2Former 中也会有改动。要注意的是当本地存在多个 Detectron2 项目文件夹时,需要在虚拟环境安装过的 Detectron2 中进行修改。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。