赞
踩
VGG16结构
'''
img[h,w,3] --> resize[224,224,3] -->
cnv1*2(k= 3,f = 64)[224,224,64] + maxPool(s = 2)[112,112,64]-->
cnv2*2(k= 3,f = 128)[112,112,128] + maxPool(s = 2)[56,56,128]--->
cnv3*2(k= 3,f = 256)[56,56,256] + maxPool(s = 2)[28,28,256]--->
cnv4*2(k= 3,f = 512)[28,28,512] + maxPool(s = 2)[14,14,512]--->
cnv5*2(k= 3,f = 512)[14,14,512] + maxPool(s = 2)[7,7,512]----->
flatten--> fc*2(4096) --> fc(1000) -->
out
'''
2 . VGG16代码
''' VGG16 流程 cfgs['D'] (模型参数) --> make_layers(cfgs['D']) (生成主干features)--> VGG(make_layers(cfgs['D'])) (全连接 )--> pretrained (是否迁移学习) --> num_classes != 1000 (重新定义分类层) ''' import torch import torchvision import torch.nn as nn from torch.autograd import Variable from torchvision.models.utils import load_state_dict_from_url model_urls = {'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth'} class VGG(nn.Module): def __init__(self,features,num_classes=1000,init_weights=True): super(VGG, self).__init__() self.features = features self.avgpool = nn.AdaptiveAvgPool2d((7,7)) # output size of (7,7) self.clssifier = nn.Sequential( nn.Linear(512*7*7,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,num_classes)) if init_weights: self._initialize_weights() def forward(self,x): x = self.features(x) # [2, 512, 7, 7] x = self.avgpool(x) # [2, 512, 7, 7] x = torch.flatten(x,1) # [2, 25088] axis=1 从坐标轴起后面的数展平。 x = self.clssifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m,nn.Conv2d): nn.init.normal_(m.weight) # nn.init.uniform(m.weight) #nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias,0) elif isinstance(m,nn.BatchNorm2d): nn.init.constant_(m.weight,1) nn.init.constant_(m.bias,0) elif isinstance(m,nn.Linear): nn.init.constant_(m.weight,0.01) nn.init.constant_(m.bias,0) def make_layers(cfg,batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size = 2,stride=2)] else: conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1) if batch_norm: layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(inplace=True)] else: layers += [conv2d,nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) cfgs = { 'D':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']} def vgg16(pretrained=False,progress=True,num_classes=1000): model = VGG(make_layers(cfgs['D'])) if pretrained: state_dict = load_state_dict_from_url(model_urls['vgg16'], model_dir = './model_data', progress=progress) model.load_state_dict(state_dict,strict=False) if num_classes != 1000: model.clssifier = nn.Sequential( nn.Linear(512*7*7,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096,num_classes)) return model if __name__ == '__main__': x = torch.randn([2,3,224,224]) model = vgg16(num_classes=2) y = model(x) print(y.shape) ''' torch.Size([2, 2]) Process finished with exit code 0 '''
MobileNetV1用深度可分离卷积大大减少了参数的数量,先用3x3的卷积核依次和输入n 个特征卷积,得到n个输出特征,在用1x1卷积核进行普通的卷积得到输出特征。比如输入16个特征,需要输出32个特征,普通卷积的参数为16x3x3x32=4608个。用深度可分离卷积需要的参数量为16x3x3x1+16x1x1x32=656个。通过深度可分离卷积可以大大减少模型的参数。
''' ''' import torch import torch.nn as nn def conv_bn(inp, oup, stride = 1): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True) ) def conv_dw(inp, oup, stride = 1): return nn.Sequential( nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), nn.BatchNorm2d(inp), nn.ReLU6(inplace=True), nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True), ) class MobileNetV1(nn.Module): def __init__(self): super(MobileNetV1, self).__init__() self.stage1 = nn.Sequential( # 640,640,3 -> 320,320,32 conv_bn(3, 32, 2), # 320,320,32 -> 320,320,64 conv_dw(32, 64, 1), # 320,320,64 -> 160,160,128 conv_dw(64, 128, 2), conv_dw(128, 128, 1), # 160,160,128 -> 80,80,256 conv_dw(128, 256, 2), conv_dw(256, 256, 1), ) # 80,80,256 -> 40,40,512 self.stage2 = nn.Sequential( conv_dw(256, 512, 2), conv_dw(512, 512, 1), conv_dw(512, 512, 1), conv_dw(512, 512, 1), conv_dw(512, 512, 1), conv_dw(512, 512, 1), ) # 40,40,512 -> 20,20,1024 self.stage3 = nn.Sequential( conv_dw(512, 1024, 2), conv_dw(1024, 1024, 1), ) self.avg = nn.AdaptiveAvgPool2d((1,1)) self.fc = nn.Linear(1024, 1000) def forward(self, x): x = self.stage1(x) x = self.stage2(x) x = self.stage3(x) x = self.avg(x) # x = self.model(x) x = x.view(-1, 1024) x = self.fc(x) return x def mobilenet_v1(pretrained=False, progress=True,num_classes= 1000): model = MobileNetV1() if pretrained: print("mobilenet_v1 has no pretrained model") if num_classes != 1000: model.fc = nn.Linear(1024, num_classes) return model if __name__ == "__main__": input = torch.randn([8,3,416,416]) model = mobilenet_v1(num_classes= 2) out = model(input) print(out.shape) ''' torch.Size([8, 2]) Process finished with exit code 0 '''
MobileNetV2在MobileNetV1的基础上,增加Inverted resblock模块儿。Inverted体现在对输入首先利用1x1卷积进行升维,然后利用3x3深度可分离卷积进行特征提取,然后再利用1x1卷积降维。resblock体现在输入和上一步输出相加。如下所示:
''' (1)Inverted resblock Inverted:input--> conv2d(1*1 升维)+BN+ReLU6 --> DepthwiseConv2D(3*3)+BN+ReLU6--> conv2d(1*1 降维)+BN --> output resblock: output + input (2)MobileNetV2 input.shape expand_ratio channels times strides conv2d 224^2*3 - 32 1 2 bottleneck 112^2*32 1 16 1 1 bottleneck 112^2*16 6 24 2 2 bottleneck 56^2*24 6 32 3 2 bottleneck 28^2*32 6 64 4 2 bottleneck 28^2*64 6 96 3 1 bottleneck 14^2*96 6 160 3 2 bottleneck 7^2*160 6 320 1 1 conv2d 1*1 7^2*320 - 1280 1 1 avgpool7*7 7^2*1280 - - 1 - conv2d 1*1 1*1*k - k - '''
''' MobileNetV2 ''' import torch import torch.nn as nn from torchvision.models.utils import load_state_dict_from_url __all__ = ['MobileNetV2', 'mobilenet_v2'] model_urls = { 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'} def _make_divisible(v, divisor, min_value=None): if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) if new_v < 0.9 * v: new_v += divisor return new_v class ConvBNReLU(nn.Sequential): def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): padding = (kernel_size - 1) // 2 super(ConvBNReLU, self).__init__( nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm2d(out_planes), nn.ReLU6(inplace=True) ) class InvertedResidual(nn.Module): def __init__(self, inp, oup, stride, expand_ratio): super(InvertedResidual, self).__init__() self.stride = stride assert stride in [1, 2] hidden_dim = int(round(inp * expand_ratio)) self.use_res_connect = self.stride == 1 and inp == oup layers = [] if expand_ratio != 1: layers += [ConvBNReLU(inp, hidden_dim, kernel_size=1)] layers += [ ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup) ] self.conv = nn.Sequential(*layers) def forward(self, x): if self.use_res_connect: return x + self.conv(x) else: return self.conv(x) class MobileNetV2(nn.Module): def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = 32 last_channel = 1280 if inverted_residual_setting is None: inverted_residual_setting = [ [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] if len(inverted_residual_setting) == 0 or len( inverted_residual_setting[0]) != 4: raise ValueError( "inverted_residual_setting should be non_empty" "or a 4-element list,got{}".format( inverted_residual_setting)) input_channel = _make_divisible( input_channel * width_mult, round_nearest) self.last_channel = _make_divisible( last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(3, input_channel, stride=2)] for t, c, n, s in inverted_residual_setting: out_channel = _make_divisible(c * width_mult, round_nearest) for i in range(n): stride = s if i == 0 else 1 features += [block(input_channel, out_channel, stride, expand_ratio=t)] input_channel = out_channel features += [ConvBNReLU(input_channel, self.last_channel,kernel_size=1)] self.features = nn.Sequential(*features) self.classifier = nn.Sequential(nn.Dropout(0.2),nn.Linear(self.last_channel,num_classes)) def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight) # nn.init.uniform(m.weight) # nn.init.kaiming_normal(m.weight,mode='fan_out',nonlinearity='relu') if m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.constant_(m.weight, 0.01) nn.init.zeros_(m.bias) def forward(self, x): x = self.features(x) x = x.mean([2, 3]) x = self.classifier(x) return x def mobilenet_v2(pretrained = False, progress = True, num_classes = 1000): model = MobileNetV2() if pretrained: state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir = './model_data', progress = progress) model.load_state_dict(state_dict,strict=False) if num_classes != 1000: model.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(model.last_channel,num_classes)) return model if __name__ == '__main__': x = torch.randn([20, 3, 224, 224]) # convBnReLU = ConvBNReLU(in_planes=3,out_planes=100) # convBnReLU(x).shape # output = InvertedResidual(3, 24, 2, 2)(x) # model = MobileNetV2() model = mobilenet_v2(num_classes = 20000) output = model(x) print(output.shape) ''' torch.Size([20, 20000]) Process finished with exit code 0 '''
MobileNetV3的亮点是在InvertedResidual模块儿中添加了注意力机制SE,使用HS激活函数。
import torch.nn as nn import math import torch def _make_divisible(v, divisor, min_value=None): if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: new_v += divisor return new_v class h_sigmoid(nn.Module): def __init__(self, inplace=True): super(h_sigmoid, self).__init__() self.relu = nn.ReLU6(inplace=inplace) def forward(self, x): return self.relu(x + 3) / 6 class h_swish(nn.Module): def __init__(self, inplace=True): super(h_swish, self).__init__() self.sigmoid = h_sigmoid(inplace=inplace) def forward(self, x): return x * self.sigmoid(x) class SELayer(nn.Module): def __init__(self, channel, reduction=4): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, _make_divisible(channel // reduction, 8)), nn.ReLU(inplace=True), nn.Linear(_make_divisible(channel // reduction, 8), channel), h_sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) # [b,c,h,w] --> [b,c,1,1] --> [b,c] y = self.fc(y).view(b, c, 1, 1) # 降维--> 升维 return x * y def conv_3x3_bn(inp, oup, stride): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), h_swish() ) def conv_1x1_bn(inp, oup): return nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), h_swish() ) class InvertedResidual(nn.Module): def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs): super(InvertedResidual, self).__init__() assert stride in [1, 2] self.identity = stride == 1 and inp == oup if inp == hidden_dim: self.conv = nn.Sequential( # dw nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False), nn.BatchNorm2d(hidden_dim), h_swish() if use_hs else nn.ReLU(inplace=True), # Squeeze-and-Excite SELayer(hidden_dim) if use_se else nn.Identity(), # pw-linear nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), ) else: self.conv = nn.Sequential( # pw nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), nn.BatchNorm2d(hidden_dim), h_swish() if use_hs else nn.ReLU(inplace=True), # dw nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False), nn.BatchNorm2d(hidden_dim), # Squeeze-and-Excite SELayer(hidden_dim) if use_se else nn.Identity(), h_swish() if use_hs else nn.ReLU(inplace=True), # pw-linear nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), ) def forward(self, x): if self.identity: return x + self.conv(x) else: return self.conv(x) class MobileNetV3(nn.Module): def __init__(self, num_classes=1000, width_mult=1.): self.num_classes = num_classes super(MobileNetV3, self).__init__() # setting of inverted residual blocks self.cfgs = [ #` k, t, c, SE, HS, s [3, 1, 16, 0, 0, 1], [3, 4, 24, 0, 0, 2], [3, 3, 24, 0, 0, 1], [5, 3, 40, 1, 0, 2], [5, 3, 40, 1, 0, 1], [5, 3, 40, 1, 0, 1], [3, 6, 80, 0, 1, 2], [3, 2.5, 80, 0, 1, 1], [3, 2.3, 80, 0, 1, 1], [3, 2.3, 80, 0, 1, 1], [3, 6, 112, 1, 1, 1], [3, 6, 112, 1, 1, 1], [5, 6, 160, 1, 1, 2], [5, 6, 160, 1, 1, 1], [5, 6, 160, 1, 1, 1] ] input_channel = _make_divisible(16 * width_mult, 8) # 16 layers = [conv_3x3_bn(3, input_channel, 2)] block = InvertedResidual for k, t, c, use_se, use_hs, s in self.cfgs: output_channel = _make_divisible(c * width_mult, 8) exp_size = _make_divisible(input_channel * t, 8) layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs)) input_channel = output_channel self.features = nn.Sequential(*layers) self.conv = conv_1x1_bn(input_channel, exp_size) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) output_channel = _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280 self.classifier = nn.Sequential( nn.Linear(exp_size, output_channel), h_swish(), nn.Dropout(0.2), nn.Linear(output_channel, num_classes), ) self._initialize_weights() def forward(self, x): x = self.features(x) x = self.conv(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 0.01) m.bias.data.zero_() def mobilenet_v3(pretrained=False, **kwargs): model = MobileNetV3(**kwargs) if pretrained: state_dict = torch.load('./model_data/mobilenetv3-large-1cd25616.pth') model.load_state_dict(state_dict, strict=True) if model.num_classes != 1000: in_channel = next(model.classifier[-1].parameters()).shape[1] model.classifier[-1] = nn.Linear(in_channel, model.num_classes) return model if __name__ == '__main__': x = torch.randn([8,3,416,416]) model = mobilenet_v3(num_classes=2) y = model(x) print(y.shape) ''' torch.Size([8, 2]) Process finished with exit code 0 ''' ''' features(x) Sequential( (0): Sequential( (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (1): InvertedResidual( (conv): Sequential( (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False) (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Identity() (4): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (2): InvertedResidual( (conv): Sequential( (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False) (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): ReLU(inplace=True) (7): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (3): InvertedResidual( (conv): Sequential( (0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=72, bias=False) (4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): ReLU(inplace=True) (7): Conv2d(72, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (4): InvertedResidual( (conv): Sequential( (0): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(72, 72, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=72, bias=False) (4): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=72, out_features=24, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=24, out_features=72, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): ReLU(inplace=True) (7): Conv2d(72, 40, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (5): InvertedResidual( (conv): Sequential( (0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False) (4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=120, out_features=32, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=32, out_features=120, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): ReLU(inplace=True) (7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (6): InvertedResidual( (conv): Sequential( (0): Conv2d(40, 120, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): Conv2d(120, 120, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=120, bias=False) (4): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=120, out_features=32, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=32, out_features=120, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): ReLU(inplace=True) (7): Conv2d(120, 40, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (7): InvertedResidual( (conv): Sequential( (0): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False) (4): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (8): InvertedResidual( (conv): Sequential( (0): Conv2d(80, 200, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=200, bias=False) (4): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(200, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (9): InvertedResidual( (conv): Sequential( (0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False) (4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (10): InvertedResidual( (conv): Sequential( (0): Conv2d(80, 184, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(184, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=184, bias=False) (4): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Identity() (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(184, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (11): InvertedResidual( (conv): Sequential( (0): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False) (4): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=480, out_features=120, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=120, out_features=480, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (12): InvertedResidual( (conv): Sequential( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(672, 672, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=672, bias=False) (4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=672, out_features=168, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=168, out_features=672, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (13): InvertedResidual( (conv): Sequential( (0): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False) (4): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=672, out_features=168, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=168, out_features=672, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(672, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (14): InvertedResidual( (conv): Sequential( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=960, out_features=240, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=240, out_features=960, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (15): InvertedResidual( (conv): Sequential( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (3): Conv2d(960, 960, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=960, bias=False) (4): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): SELayer( (avg_pool): AdaptiveAvgPool2d(output_size=1) (fc): Sequential( (0): Linear(in_features=960, out_features=240, bias=True) (1): ReLU(inplace=True) (2): Linear(in_features=240, out_features=960, bias=True) (3): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) (6): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (7): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False) (8): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ''' ''' conv_1x1_bn(input_channel, exp_size): (17) Sequential( (0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) ) ''' ''' nn.AdaptiveAvgPool2d((1, 1)): (18) AdaptiveAvgPool2d(output_size=(1, 1)) ''' ''' classifier: (19) Sequential( (0): Linear(in_features=960, out_features=1280, bias=True) (1): h_swish( (sigmoid): h_sigmoid( (relu): ReLU6(inplace=True) ) ) (2): Dropout(p=0.2, inplace=False) (3): Linear(in_features=1280, out_features=1000, bias=True) ) '''
ResNet50模型主要有两个模块儿:Conv Block和Identity Block。Conv Block负责下采样和提取特征,Identity Block是传统的残差结构,负责提取特征的扩展通道数。
''' Conv Block: input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1 input --> Conv2d + BatchNorm -->o2 o1 + o2 --> ReLU --> output Identity Block: input --> Conv2d + BatchNorm + ReLU --> Conv2d + BatchNorm + ReLU--> Conv2d + BatchNorm --> o1 input + o2 --> ReLU --> output ResNet50: input Zeropad Conv2d + BatchNorm + ReLU + MaxPool Conv Block + Identity Block*2 Conv Block + Identity Block*3 Conv Block + Identity Block*5 Conv Block + Identity Block*2 AveragePooling2D Flatten output '''
''' Bottleneck根据downsample的输入确定是Conv Block还是Identity Block。downsample是由strides或者扩展输入通道与设定通道是否一致确定,downsample的内核是对特征层进行1*1,s=2 的下采样和BN组成。 Bottleneck的内核是(conv1*1 + conv3*3 + conv1*1 ) + residual _make_layer的内核是Conv Block和Identity Block的组合。 ResNet的内核是其网络结构。 resnet50是实例化ResNet,加载参数,改变模型输出的类别数。 ''' import torch import torch.nn as nn from torchvision.models.utils import load_state_dict_from_url model_urls = { 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', } def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) def conv1x1(in_planes, out_planes, stride=1): return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): super(Bottleneck, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width / 64.)) * groups # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) self.conv2 = conv3x3(width, width, stride, groups, dilation) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 self.block = block if replace_stride_with_dilation is None: replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.fc(x) return x def resnet50(pretrained=False, progress=True, num_classes=1000): model = ResNet(Bottleneck, [3, 4, 6, 3]) if pretrained: state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data', progress=progress) model.load_state_dict(state_dict) if num_classes != 1000: model.fc = nn.Linear(512 * model.block.expansion, num_classes) return model if __name__ == '__main__': x = torch.randn([20,3,224,224]) mod = resnet50(num_classes=2) output = mod(x) print(output.shape) ''' torch.Size([20, 2]) Process finished with exit code 0 '''
对比发现VGG16(大约574MB)模型参数占内存最大,resnet50次之(97MB),mobilenetv3(22.1MB)、mobilenetv1(17MB)、mobilenetv2(14.2MB)模型参数量依次减少。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。