赞
踩
CNN模型的能力来源于模型的大小和数据集的规模。卷积层设计中的一个基本假设是,相同的卷积核应用于数据集中的每个示例。为了增加模型的容量,模型开发人员通常会添加更多的卷积层或增加现有卷积的大小(内核高度/宽度,输入/输出通道的数量)。这会导致模型增大,影响速度。
条件参数化卷积(CondConv),它通过计算卷积核作为输入的函数来挑战静态卷积核的范式。所谓条件:条件计算的目的是在不增加计算成本的情况下增加模型容量;在条件计算模型中,通过为每个示例仅激活整个网络的一部分来实现的。多分支卷积网络:一个层由多个卷积分支组成,这些分支被聚合以计算最终输出,如:ResNet和Inception等。CondConv层在数学上相当于多分支卷积层,其中每个分支是单个卷积,输出通过加权和聚合,但只需要计算一个卷积。
CondConv的思路不难,具体来说:
卷积核将按照上图(a)的方式进行参数化,其中W1、W2、W3就相当于多个卷积核;ROUTE FN就相当于W1、W2、W3前的权重参数。通过(a1W1+a2W2+a3W3)
组合得到一个卷积核,使用这个新得到的卷积核进行卷积操作。
从代码可以更为直观的理解CondConv做的事情
CondConv-Pytorch
# 权值计算,num_experts是设置的专家数量 class _routing(nn.Module): def __init__(self, in_channels, num_experts, dropout_rate): super(_routing, self).__init__() self.dropout = nn.Dropout(dropout_rate) self.fc = nn.Linear(in_channels, num_experts) def forward(self, x): x = torch.flatten(x) x = self.dropout(x) x = self.fc(x) return F.sigmoid(x) # CondConv卷积过程 class CondConv2D(_ConvNd): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2): kernel_size = _pair(kernel_size) # 3 -> (3,3) stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) super(CondConv2D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, False, _pair(0), groups, bias, padding_mode) # 全局平均池化会将输入特征由尺寸(bs, c, w, h) -> (bs, c, 1, 1),为后续的权重计算做准备 self._avg_pooling = functools.partial(F.adaptive_avg_pool2d, output_size=(1, 1)) # 调用权重计算函数,输入由(bs, c, 1, 1) -> (bs, num_experts, 1, 1),通道数发生改变,和专家数量的多少有关 self._routing_fn = _routing(in_channels, num_experts, dropout_rate) # 构建了一个尺寸为(num_experts, out_channels, in_channels , kernel_size, kernel_size)的Tensor变量 self.weight = Parameter(torch.Tensor( num_experts, out_channels, in_channels // groups, *kernel_size)) self.reset_parameters() def _conv_forward(self, input, weight): if self.padding_mode != 'zeros': return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), weight, self.bias, self.stride, _pair(0), self.dilation, self.groups) return F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) def forward(self, inputs): b, _, _, _ = inputs.size() res = [] # inputs: (bs, c, w, h) input: (c, w, h) for input in inputs: input = input.unsqueeze(0) pooled_inputs = self._avg_pooling(input) routing_weights = self._routing_fn(pooled_inputs) # 在维度0上求和 kernels = torch.sum(routing_weights[:, None, None, None, None] * self.weight, 0) out = self._conv_forward(input, kernels) res.append(out) return torch.cat(res, dim=0) # 测试 x = torch.rand(1, 20, 40, 40) condconv = CondConv2D(20, 40, 3, num_experts=3) print(condconv(x).size()) # [1, 40, 38, 38]
相关的代码注释已经标注在代码上,通过Debug代码可以更好的理解到这种计算方式与注意力机制中的通道注意力的不同。CondConv相当于构建了num_experts
个(out_channels, in_channels , kernel_size, kernel_size)
,通过融合这num_experts
个卷积;而通道注意力是对单个卷积中的通道进行加权赋值的。
考虑在C3中引入CondConv模块,在common.py
中粘贴下面的代码:
class _routing(nn.Module): def __init__(self, in_channels, num_experts, dropout_rate): super(_routing, self).__init__() self.dropout = nn.Dropout(dropout_rate) self.fc = nn.Linear(in_channels, num_experts) def forward(self, x): x = torch.flatten(x) x = self.dropout(x) x = self.fc(x) return F.sigmoid(x) class CondConv2D(_ConvNd): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2): kernel_size = _pair(kernel_size) # 3 -> (3,3) stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) super(CondConv2D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, False, _pair(0), groups, bias, padding_mode) self._avg_pooling = functools.partial(F.adaptive_avg_pool2d, output_size=(1, 1)) self._routing_fn = _routing(in_channels, num_experts, dropout_rate) self.weight = Parameter(torch.Tensor( num_experts, out_channels, in_channels // groups, *kernel_size)) self.reset_parameters() def _conv_forward(self, input, weight): if self.padding_mode != 'zeros': return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), weight, self.bias, self.stride, _pair(0), self.dilation, self.groups) return F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) def forward(self, inputs): b, _, _, _ = inputs.size() res = [] for input in inputs: input = input.unsqueeze(0) pooled_inputs = self._avg_pooling(input) routing_weights = self._routing_fn(pooled_inputs) kernels = torch.sum(routing_weights[:, None, None, None, None] * self.weight, 0) out = self._conv_forward(input, kernels) res.append(out) return torch.cat(res, dim=0) class C3_CondConv(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, num_experts=3, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = CondConv2D(c1, c_, num_experts=num_experts, kernel_size=1, stride=1) self.cv2 = CondConv2D(c1, c_, num_experts=num_experts, kernel_size=1, stride=1) self.cv3 = CondConv2D(2 * c_, c2, num_experts=num_experts, kernel_size=1) # optional act=FReLU(c2) self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) # -------------------------------------------测试代码,粘贴时不需要带上-------------------------------------------------- x = torch.rand(1, 20, 40, 40) condconv = CondConv2D(20, 40, 3, num_experts=3) print(condconv(x).size()) c3_condconv = C3_CondConv(20, 40, 3) print(c3_condconv(x).size())
在yolo.py
的def parse_model(d, ch):
函数中引入如下代码:
if m in {
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, C3RFEM, RFEM_KCPNet, C3_CondConv}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x, C3RFEM, C3_CondConv}:
args.insert(2, n) # number of repeats
n = 1
设置相关的yaml
文件:
# YOLOv5 声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/羊村懒王/article/detail/578914
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。