人工智能论文通用创新点(一)——ACMIX 卷积与注意力融合、GCnet(全局特征融合)、Coordinate_attention、SPD(可替换下采样)


1.ACMIX 卷积与注意力融合








  1. class ACmix(nn.Module):
  2. def __init__(self, in_planes, out_planes, kernel_att=7, head=4, kernel_conv=3, stride=1, dilation=1):
  3. super(ACmix, self).__init__()
  4. self.in_planes = in_planes
  5. self.out_planes = out_planes
  6. self.head = head
  7. self.kernel_att = kernel_att
  8. self.kernel_conv = kernel_conv
  9. self.stride = stride
  10. self.dilation = dilation
  11. self.rate1 = torch.nn.Parameter(torch.Tensor(1))
  12. self.rate2 = torch.nn.Parameter(torch.Tensor(1))
  13. self.head_dim = self.out_planes // self.head
  14. self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1)
  15. self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1)
  16. self.conv3 = nn.Conv2d(in_planes, out_planes, kernel_size=1)
  17. self.conv_p = nn.Conv2d(2, self.head_dim, kernel_size=1)
  18. self.padding_att = (self.dilation * (self.kernel_att - 1) + 1) // 2
  19. self.pad_att = torch.nn.ReflectionPad2d(self.padding_att)
  20. self.unfold = nn.Unfold(kernel_size=self.kernel_att, padding=0, stride=self.stride)
  21. self.softmax = torch.nn.Softmax(dim=1)
  22. self.fc = nn.Conv2d(3 * self.head, self.kernel_conv * self.kernel_conv, kernel_size=1, bias=False)
  23. self.dep_conv = nn.Conv2d(self.kernel_conv * self.kernel_conv * self.head_dim, out_planes,
  24. kernel_size=self.kernel_conv, bias=True, groups=self.head_dim, padding=1,
  25. stride=stride)
  26. self.reset_parameters()
  27. def reset_parameters(self):
  28. init_rate_half(self.rate1)
  29. init_rate_half(self.rate2)
  30. kernel = torch.zeros(self.kernel_conv * self.kernel_conv, self.kernel_conv, self.kernel_conv)
  31. for i in range(self.kernel_conv * self.kernel_conv):
  32. kernel[i, i // self.kernel_conv, i % self.kernel_conv] = 1.
  33. kernel = kernel.squeeze(0).repeat(self.out_planes, 1, 1, 1)
  34. self.dep_conv.weight = nn.Parameter(data=kernel, requires_grad=True)
  35. self.dep_conv.bias = init_rate_0(self.dep_conv.bias)
  36. def forward(self, x):
  37. # 经过1*1的卷积得到q,k,v,同时也是后面进行共享的特征图
  38. q, k, v = self.conv1(x), self.conv2(x), self.conv3(x)
  39. # 归一化q*k/根号k
  40. scaling = float(self.head_dim) ** -0.5
  41. b, c, h, w = q.shape
  42. h_out, w_out = h // self.stride, w // self.stride
  43. # ### att
  44. # ## positional encoding 位置编码
  45. pe = self.conv_p(position(h, w, x.is_cuda))
  46. q_att = q.view(b * self.head, self.head_dim, h, w) * scaling
  47. k_att = k.view(b * self.head, self.head_dim, h, w)
  48. v_att = v.view(b * self.head, self.head_dim, h, w)
  49. if self.stride > 1:
  50. q_att = stride(q_att, self.stride)
  51. q_pe = stride(pe, self.stride)
  52. else:
  53. q_pe = pe
  54. # 重构key,得到窗口特征
  55. unfold_k = self.unfold(self.pad_att(k_att)).view(b * self.head, self.head_dim,
  56. self.kernel_att * self.kernel_att, h_out,
  57. w_out) # b*head, head_dim, k_att^2, h_out, w_out
  58. unfold_rpe = self.unfold(self.pad_att(pe)).view(1, self.head_dim, self.kernel_att * self.kernel_att, h_out,
  59. w_out) # 1, head_dim, k_att^2, h_out, w_out
  60. att = (q_att.unsqueeze(2) * (unfold_k + q_pe.unsqueeze(2) - unfold_rpe)).sum(
  61. 1) # (b*head, head_dim, 1, h_out, w_out) * (b*head, head_dim, k_att^2, h_out, w_out) -> (b*head, k_att^2, h_out, w_out)
  62. att = self.softmax(att)
  63. out_att = self.unfold(self.pad_att(v_att)).view(b * self.head, self.head_dim, self.kernel_att * self.kernel_att,
  64. h_out, w_out)
  65. out_att = (att.unsqueeze(1) * out_att).sum(2).view(b, self.out_planes, h_out, w_out)
  66. ## conv 共享q,k,v,进行卷积特征提取
  67. f_all = self.fc(torch.cat(
  68. [q.view(b, self.head, self.head_dim, h * w), k.view(b, self.head, self.head_dim, h * w),
  69. v.view(b, self.head, self.head_dim, h * w)], 1))
  70. f_conv = f_all.permute(0, 2, 1, 3).reshape(x.shape[0], -1, x.shape[-2], x.shape[-1])
  71. out_conv = self.dep_conv(f_conv)
  72. # 特征融合
  73. return self.rate1 * out_att + self.rate2 * out_conv







  1. class CB2d(nn.Module):
  2. def __init__(self, inplanes, pool='att', fusions=['channel_add', 'channel_mul']):
  3. super(CB2d, self).__init__()
  4. assert pool in ['avg', 'att']
  5. assert all([f in ['channel_add', 'channel_mul'] for f in fusions])
  6. assert len(fusions) > 0, 'at least one fusion should be used'
  7. self.inplanes = inplanes
  8. self.planes = inplanes // 4
  9. self.pool = pool
  10. self.fusions = fusions
  11. if 'att' in pool:
  12. self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
  13. self.softmax = nn.Softmax(dim=2)
  14. else:
  15. self.avg_pool = nn.AdaptiveAvgPool2d(1)
  16. if 'channel_add' in fusions:
  17. self.channel_add_conv = nn.Sequential(
  18. nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
  19. nn.LayerNorm([self.planes, 1, 1]),
  20. nn.ReLU(inplace=True),
  21. nn.Conv2d(self.planes, self.inplanes, kernel_size=1)
  22. )
  23. else:
  24. self.channel_add_conv = None
  25. if 'channel_mul' in fusions:
  26. self.channel_mul_conv = nn.Sequential(
  27. nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
  28. nn.LayerNorm([self.planes, 1, 1]),
  29. nn.ReLU(inplace=True),
  30. nn.Conv2d(self.planes, self.inplanes, kernel_size=1)
  31. )
  32. else:
  33. self.channel_mul_conv = None
  34. self.reset_parameters()
  35. def reset_parameters(self):
  36. if self.pool == 'att':
  37. kaiming_init(self.conv_mask, mode='fan_in')
  38. self.conv_mask.inited = True
  39. if self.channel_add_conv is not None:
  40. last_zero_init(self.channel_add_conv)
  41. if self.channel_mul_conv is not None:
  42. last_zero_init(self.channel_mul_conv)
  43. def spatial_pool(self, x):
  44. batch, channel, height, width = x.size()
  45. # 得到图像中各个特征点的权重
  46. if self.pool == 'att': # iscyy
  47. input_x = x
  48. input_x = input_x.view(batch, channel, height * width)
  49. input_x = input_x.unsqueeze(1)
  50. # mask即特征点的权重,首先使用卷积,然后使用softmax操作
  51. context_mask = self.conv_mask(x)
  52. context_mask = context_mask.view(batch, 1, height * width)
  53. context_mask = self.softmax(context_mask)
  54. context_mask = context_mask.unsqueeze(3)
  55. # 将权重作用到原始特征图
  56. context = torch.matmul(input_x, context_mask)
  57. context = context.view(batch, channel, 1, 1)
  58. else:
  59. context = self.avg_pool(x)
  60. return context
  61. def forward(self, x):
  62. context = self.spatial_pool(x)
  63. # 在通道层面学习一个权重和偏置项
  64. if self.channel_mul_conv is not None:
  65. channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
  66. out = x * channel_mul_term
  67. else:
  68. out = x
  69. if self.channel_add_conv is not None:
  70. channel_add_term = self.channel_add_conv(context)
  71. out = out + channel_add_term
  72. return out






  1. class CA(nn.Module):
  2. # Coordinate Attention for Efficient Mobile Network Design
  3. '''
  4. Recent studies on mobile network design have demonstrated the remarkable effectiveness of channel attention (e.g., the Squeeze-and-Excitation attention) for lifting
  5. model performance, but they generally neglect the positional information, which is important for generating spatially selective attention maps. In this paper, we propose a
  6. novel attention mechanism for mobile iscyy networks by embedding positional information into channel attention, which
  7. we call “coordinate attention”. Unlike channel attention
  8. that transforms a feature tensor to a single feature vector iscyy via 2D global pooling, the coordinate attention factorizes channel attention into two 1D feature encoding
  9. processes that aggregate features along the two spatial directions, respectively
  10. '''
  11. def __init__(self, inp, oup, reduction=32):
  12. super(CA, self).__init__()
  13. mip = max(8, inp // reduction)
  14. self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
  15. self.bn1 = nn.BatchNorm2d(mip)
  16. self.act = h_swish()
  17. self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
  18. self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
  19. def forward(self, x):
  20. identity = x
  21. # 沿着H,W维度分别进行平均池化
  22. n, c, h, w = x.size()
  23. pool_h = nn.AdaptiveAvgPool2d((h, 1))
  24. pool_w = nn.AdaptiveAvgPool2d((1, w))
  25. x_h = pool_h(x)
  26. x_w = pool_w(x).permute(0, 1, 3, 2)
  27. # H,W维度拼接,经过卷积进行特征提取,进一步学习H,W维度的关联
  28. y = torch.cat([x_h, x_w], dim=2)
  29. y = self.conv1(y)
  30. y = self.bn1(y)
  31. y = self.act(y)
  32. # 分离H,W维度
  33. x_h, x_w = torch.split(y, [h, w], dim=2)
  34. x_w = x_w.permute(0, 1, 3, 2)
  35. # 经过sigmoid得到注意力权重
  36. a_h = self.conv_h(x_h).sigmoid()
  37. a_w = self.conv_w(x_w).sigmoid()
  38. # 将坐标轴的注意力作用到原特征图
  39. out = identity * a_w * a_h
  40. return out




  1. class space_to_depth(nn.Module):
  2. # Changing the dimension of the Tensor
  3. def __init__(self, dimension=1):
  4. super().__init__()
  5. self.d = dimension
  6. def forward(self, x):
  7. return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)




导入:from torchvision.ops import Deformconv2d


self.offset=nn.Conv2d(512,18, 3, padding=1, bias=True)

self.torchvision_dcn2d = Deformconv2(512, 512, 3, stride=1, padding=1)


offset = self.torchvision_offset(x)

x= self.torchvision_dcn2d(x,offset)


