赞
踩
论文地址:Identity Mappings in Deep Residual Networks
参考链接:https://blog.csdn.net/wspba/article/details/60750007
该文针对原始的残差单元提出了改进的残差单元,如下所示:
从图中(b)可以看出,将激活函数(ReLU 和 BN) 看作是权重层的 “pre-activation”,而不是传统的“post-activation”。通过这个观点产生了一个新的残差单元。基于这个单元构建的1001层ResNet在CIFAR-10/100上展现了更具竞争力的结果,并且与原始的ResNet相比,也更容易训练,泛化能力更强。这些结果表明了,作为深度学习成功的关键,模型的深度仍然具有很大的可拓展空间。
在Pytorch实现上,相比于原始的ResNet, Pre-activation ResNet只需要改变forward函数的内容即可,原始的ResNet代码实现可见:https://blog.csdn.net/winycg/article/details/86709991
'''Pre-activation ResNet in PyTorch. Reference: [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun Identity Mappings in Deep Residual Networks. arXiv:1603.05027 ''' import torch import torch.nn as nn import torch.nn.functional as F class PreActBlock(nn.Module): '''Pre-activation version of the BasicBlock.''' expansion = 1 def __init__(self, in_planes, planes, stride=1): super(PreActBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out += shortcut return out class PreActBottleneck(nn.Module): '''Pre-activation version of the original Bottleneck module.''' expansion = 4 def __init__(self, in_planes, planes, stride=1): super(PreActBottleneck, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) if stride != 1 or in_planes != self.expansion*planes: self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) ) def forward(self, x): out = F.relu(self.bn1(x)) shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x out = self.conv1(out) out = self.conv2(F.relu(self.bn2(out))) out = self.conv3(F.relu(self.bn3(out))) out += shortcut return out class PreActResNet(nn.Module): def __init__(self, block, num_blocks, num_classes=10): super(PreActResNet, self).__init__() self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = nn.Linear(512*block.expansion, num_classes) def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return nn.Sequential(*layers) def forward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out def PreActResNet18(): return PreActResNet(PreActBlock, [2,2,2,2]) def PreActResNet34(): return PreActResNet(PreActBlock, [3,4,6,3]) def PreActResNet50(): return PreActResNet(PreActBottleneck, [3,4,6,3]) def PreActResNet101(): return PreActResNet(PreActBottleneck, [3,4,23,3]) def PreActResNet152(): return PreActResNet(PreActBottleneck, [3,8,36,3]) def test(): net = PreActResNet18() y = net((torch.randn(1,3,32,32))) print(y.size()) # test()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。