赞
踩
1.先下载训练好的resnet18-5c106cde.pth,并存在指定的位置上
import torchvision.models as models
# pretrained=True就可以使用预训练的模型
resnet18 = models.resnet18(pretrained=True)
torch.save(model.state_dict(), 'model/resnet18-5c106cde.pth')
2.加载resnet18的模型和运行
import torch from torchvision import models import torch.nn as nn class BasicBlock(nn.Module):# 针对于renet18至34层的残差结构 expansion = 1 def __init__(self, in_channel, out_channel, stride=1, downsample=None,**kwargs): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_channels= in_channel, out_channels=out_channel, kernel_size=3, stride =stride,padding=1,bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d(in_channels= out_channel, out_channels=out_channel, kernel_size=3, stride =1,padding=1,bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.downsample = downsample def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) # while original implementation places the stride at the first 1x1 convolution(self.conv1) # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. # This variant is also known as ResNet V1.5 and improves accuracy according to # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. expansion = 4 def __init__(self, in_channel, out_channel, stride=1, downsample=None, groups= 1, width_per_group= 64): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(in_channels= in_channel, out_channels=out_channel, kernel_size=1, stride =stride,padding=1,bias=False) self.bn1 = nn.BatchNorm2d(out_channel) self.conv2 = nn.Conv2d(in_channels= out_channel, out_channels=out_channel, kernel_size=3, stride =stride,padding=1,bias=False) self.bn2 = nn.BatchNorm2d(out_channel) self.conv3 = nn.Conv2d(in_channels= out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride =1,bias=False) self.bn3 = nn.BatchNorm2d(out_channel * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, num_outputs=None, #输出的分类数 backbone=None, pretrained=False, curriculum_steps=None, extra_outputs=0, share_top_y=True, pred_category=False, block=BasicBlock, block_num=[2,2,2,2],include_top=True, groups=1, width_per_group=64): # blocks_num:残差结构中每个block存在多少个layer层 super(ResNet, self).__init__() self.include_top = include_top self.in_channel = 64 # 输入图片经过第一层卷积的通道数 self.groups = groups self.width_per_group = width_per_group self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(self.in_channel) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, block_num[0]) self.layer2 = self._make_layer(block, 128, block_num[1], stride=2) self.layer3 = self._make_layer(block, 256, block_num[2], stride=2) self.layer4 = self._make_layer(block, 512, block_num[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size =(1,1) self.fc = nn.Linear(512 * block.expansion, 1000) image_size = 12 patch_size = 3 # 后期尝试改为2 dim = 128 depth = 2 num_classes = 35 expansion_factor = 4 num_patches = (image_size // patch_size) ** 2 self.curriculum_steps = [0, 0, 0, 0] if curriculum_steps is None else curriculum_steps self.share_top_y = share_top_y self.extra_outputs = extra_outputs self.pred_category = pred_category self.sigmoid = nn.Sigmoid() def _make_layer(self, block, channel, block_num, stride=1): downsample = None if stride != 1 or self.in_channel != channel * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(channel * block.expansion)) layers = [] layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, groups=self.groups, width_per_group=self.width_per_group)) self.in_channel = channel * block.expansion for _ in range(1, block_num): layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group)) return nn.Sequential(*layers) def forward(self, x, epoch=None, **kwargs): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) # torch.Size[B 128 12 20] x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x if __name__ == "__main__": # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = 'cpu' print("-----device:{}".format(device)) print("-----Pytorch version:{}".format(torch.__version__)) input_tensor = torch.zeros(1, 3, 100, 100) print('input_tensor:', input_tensor.shape) pretrained_file = "./model_resnet18.pt" model = ResNet() model.load_state_dict(torch.load(pretrained_file)) model.eval() out = model(input_tensor) print("out:", out.shape, out[0, 0:10])
运行结果如下:
-----device:cpu
-----Pytorch version:1.5.0
input_tensor: torch.Size([1, 3, 100, 100])
out: torch.Size([1, 1000]) tensor([ 0.4010, 0.8436, 0.3071, 0.0627, 0.4446, 0.8470, 0.1882, 0.7012,
0.2988, -0.7574], grad_fn=<SliceBackward>)
3.修改resnet18的网络架构后,如何加载原来已经训练好的模型参数。
例如:
#将114行的代码修改成
self.layer44 = self._make_layer(block, 512, block_num[3], stride=2)
#将166行的代码修改成
x = self.layer44(x)
直接加载模型,运行结果:
RuntimeError: Error(s) in loading state_dict for ResNet:
Missing key(s) in state_dict: "layer44.0.conv1.weight", "layer44.0.bn1.weight", "layer44.0.bn1.bias", "layer44.0.bn1.running_mean", "layer44.0.bn1.running_var", "layer44.0.conv2.weight", "layer44.0.bn2.weight", "layer44.0.bn2.bias", "layer44.0.bn2.running_mean", "layer44.0.bn2.running_var", "layer44.0.downsample.0.weight", "layer44.0.downsample.1.weight", "layer44.0.downsample.1.bias", "layer44.0.downsample.1.running_mean", "layer44.0.downsample.1.running_var", "layer44.1.conv1.weight", "layer44.1.bn1.weight", "layer44.1.bn1.bias", "layer44.1.bn1.running_mean", "layer44.1.bn1.running_var", "layer44.1.conv2.weight", "layer44.1.bn2.weight", "layer44.1.bn2.bias", "layer44.1.bn2.running_mean", "layer44.1.bn2.running_var".
Unexpected key(s) in state_dict: "layer4.0.conv1.weight", "layer4.0.bn1.weight", "layer4.0.bn1.bias", "layer4.0.bn1.running_mean", "layer4.0.bn1.running_var", "layer4.0.bn1.num_batches_tracked", "layer4.0.conv2.weight", "layer4.0.bn2.weight", "layer4.0.bn2.bias", "layer4.0.bn2.running_mean", "layer4.0.bn2.running_var", "layer4.0.bn2.num_batches_tracked", "layer4.0.downsample.0.weight", "layer4.0.downsample.1.weight", "layer4.0.downsample.1.bias", "layer4.0.downsample.1.running_mean", "layer4.0.downsample.1.running_var", "layer4.0.downsample.1.num_batches_tracked", "layer4.1.conv1.weight", "layer4.1.bn1.weight", "layer4.1.bn1.bias", "layer4.1.bn1.running_mean", "layer4.1.bn1.running_var", "layer4.1.bn1.num_batches_tracked", "layer4.1.conv2.weight", "layer4.1.bn2.weight", "layer4.1.bn2.bias", "layer4.1.bn2.running_mean", "layer4.1.bn2.running_var", "layer4.1.bn2.num_batches_tracked".
方法一:将原来预训练好的模型参数迁移到新的resnet18网络架构中,只有迁移两者相同的模型参数,不同的参数还是随机初始化。
def transfer_model(pretrained_file, model): pretrained_dict = torch.load(pretrained_file) # get pretrained dict model_dict = model.state_dict() # get model dict # 在合并前(update),需要去除pretrained_dict一些不需要的参数 pretrained_dict = transfer_state_dict(pretrained_dict, model_dict) model_dict.update(pretrained_dict) # 更新(合并)模型的参数 model.load_state_dict(model_dict) return model def transfer_state_dict(pretrained_dict, model_dict): # state_dict2 = {k: v for k, v in save_model.items() if k in model_dict.keys()} state_dict = {} for k, v in pretrained_dict.items(): if k in model_dict.keys(): # state_dict.setdefault(k, v) state_dict[k] = v else: print("Missing key(s) in state_dict :{}".format(k)) return state_dict if __name__ == "__main__": input_tensor = torch.zeros(1, 3, 100, 100) print('input_tensor:', input_tensor.shape) pretrained_file = "./model_resnet18.pt" # model = resnet18() # model.load_state_dict(torch.load(pretrained_file)) # model.eval() # out = model(input_tensor) # print("out:", out.shape, out[0, 0:10]) model1 = ResNet() model1 = transfer_model(pretrained_file, model1) out1 = model1(input_tensor) print("out1:", out1.shape, out1[0, 0:10])
方法二:修改网络名称并迁移学习
由于我们将官方的resnet18的self.layer4改为了:self.layer44 ,我们仅仅修改了一个网络名称而已,就导致模型参数加载出错。那么,我们如何将预训练好的模型修改成符合新网络架构?
def string_rename(old_string, new_string, start, end): new_string = old_string[:start] + new_string + old_string[end:] return new_string def modify_model(pretrained_file, model, old_prefix, new_prefix): ''' :param pretrained_file: :param model: :param old_prefix: :param new_prefix: :return: ''' pretrained_dict = torch.load(pretrained_file) model_dict = model.state_dict() state_dict = modify_state_dict(pretrained_dict, model_dict, old_prefix, new_prefix) model.load_state_dict(state_dict) return model def modify_state_dict(pretrained_dict, model_dict, old_prefix, new_prefix): ''' 修改model dict :param pretrained_dict: :param model_dict: :param old_prefix: :param new_prefix: :return: ''' state_dict = {} for k, v in pretrained_dict.items(): if k in model_dict.keys(): # state_dict.setdefault(k, v) state_dict[k] = v else: for o, n in zip(old_prefix, new_prefix): prefix = k[:len(o)] if prefix == o: kk = string_rename(old_string=k, new_string=n, start=0, end=len(o)) print("rename layer modules:{}-->{}".format(k, kk)) state_dict[kk] = v return state_dict if __name__ == "__main__": input_tensor = torch.zeros(1, 3, 100, 100) print('input_tensor:', input_tensor.shape) pretrained_file = "./model_resnet18.pt" new_file = "./model_resnet18_1.pt" model = ResNet() new_model = modify_model(pretrained_file, model, old_prefix=["layer4"], new_prefix=["layer44"]) torch.save(new_model.state_dict(), new_file) model2 = ResNet() model2.load_state_dict(torch.load(new_file)) model2.eval() out2 = model2(input_tensor) print("out2:", out2.shape, out2[0, 0:10])
方法三:去除原模型的某些模块
在对resnet18的实际应用,一般需要子模块"fc"和"avgpool"来应对实际问题。下面在不修改原模型的基础上,,通过"resnet18.named_children()"和"resnet18.children()“的方法去除子模块"fc"和"avgpool”。
import torch import torchvision.models as models from collections import OrderedDict if __name__=="__main__": resnet18 = models.resnet18(False) print("resnet18",resnet18) # use named_children() resnet18_v1 = OrderedDict(resnet18.named_children()) # remove avgpool,fc resnet18_v1.pop("avgpool") resnet18_v1.pop("fc") resnet18_v1 = torch.nn.Sequential(resnet18_v1) print("resnet18_v1",resnet18_v1) # use children resnet18_v2 = torch.nn.Sequential(*list(resnet18.children())[:-2]) print(resnet18_v2,resnet18_v2)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。