赞
踩
在使用预训练模型微调训练时,我们通常需要根据实际的数据集以及想要达到的效果,修改预训练模型的结构。查阅了其他博客和torch.nn源码后,做个笔记。
为了更方便的了解,将使用torchvision中的模型convnext作介绍。
import torch import torchvision.models as models import torch.nn as nn model = models.convnext_tiny(pretrained = False) print(model) ######################输出的模型结果################### ConvNeXt( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4)) (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) ) (1): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row) ) ) (2): Sequential( (0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) (1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2)) ) (3): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row) ) ) (4): Sequential( (0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True) (1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2)) ) (5): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row) ) (3): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row) ) (4): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row) ) (5): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row) ) (6): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row) ) (7): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row) ) (8): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row) ) ) (6): Sequential( (0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True) (1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2)) ) (7): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True) (1): Flatten(start_dim=1, end_dim=-1) (2): Linear(in_features=768, out_features=1000, bias=True) ) )
pytorch 中的 state_dict 是一个简单的python的字典对象,将每一层与它的对应参数建立映射关系.(如model的每一层的weights及偏置等等)。这个方法的作用一方面是方便查看某一个层的权值和偏置数据,另一方面更多的是在模型保存的时候使用。
torch.save(model.state_dict(), 'model_weights.pth') #保存模型的参数以及权重
#使用预训练的模型时
model = models.convnext_tiny(pretrained = False) #生成相同的模型结构
model.load_state_dict(torch.load('model_weights.pth')) #将参数权重加载到模型之中
这个方法也可以获取模型的参数信息,与前面的方法不同的是,model.parameters()方法返回的是一个生成器generator,每一个元素是从开头到结尾的参数,parameters没有对应的key名称,是一个由纯参数组成的generator,而state_dict是一个字典,包含了一个key。
PyTorch中模型参数都是由字典的形式保存,所以当你想要修改模型结构时,直接通过字典的方式调用你想要的结构并且重新定义,就可以修改模型的参数。
model.classifier = nn.Linear(in_features=768, out_features=1000, bias=True) print(model) #########################输出的结果如下##################### ConvNeXt( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4)) (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) ) (1): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row) ) ) (2): Sequential( (0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) (1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2)) ) (3): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row) ) ) (4): Sequential( (0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True) (1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2)) ) (5): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row) ) (3): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row) ) (4): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row) ) (5): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row) ) (6): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row) ) (7): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row) ) (8): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row) ) ) (6): Sequential( (0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True) (1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2)) ) (7): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Linear(in_features=768, out_features=1000, bias=True) )
但是,我们可以看到,当我们直接修改classifier里面的结构,他会将整个classifier都重新定义为你的输入的样子,那么当你只是要修改最后的分类层的话,你就只能重新去定义一整个sequential,并且在重新定义的时候,如果你已经加载了预训练的参数,预训练模型的参数就会丢失,这样就会非常麻烦。
那如何只修改classifier中最后一个线性层呢,我在网络上查找了这个内容,发现比较难找到相关的内容,大家都只能通过访问模型拥有key的部分的结构,对于sequential内部的结构,有人尝试用model.classifier.0去访问,这并不符合python的语法结构。所以我又去查询了nn.sequential的官方文档。
def _get_item_by_idx(self, iterator, idx) -> T:
"""Get the idx-th item of the iterator"""
size = len(self)
idx = operator.index(idx)
if not -size <= idx < size:
raise IndexError('index {} is out of range'.format(idx))
idx %= size
return next(islice(iterator, idx, None))
可以看到其中有个迭代器,所以我就尝试了用列表的访问方式去访问,结果证明是可行的。
model.classifier[2] = nn.Linear(in_features=768, out_features=4, bias=True) print(model) ####################输出结果如下######################## ConvNeXt( (features): Sequential( (0): Conv2dNormActivation( (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4)) (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) ) (1): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96) (1): Permute() (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=96, out_features=384, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=384, out_features=96, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row) ) ) (2): Sequential( (0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True) (1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2)) ) (3): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192) (1): Permute() (2): LayerNorm((192,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=192, out_features=768, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=768, out_features=192, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row) ) ) (4): Sequential( (0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True) (1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2)) ) (5): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row) ) (3): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row) ) (4): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row) ) (5): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row) ) (6): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row) ) (7): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row) ) (8): CNBlock( (block): Sequential( (0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384) (1): Permute() (2): LayerNorm((384,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=384, out_features=1536, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=1536, out_features=384, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row) ) ) (6): Sequential( (0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True) (1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2)) ) (7): Sequential( (0): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row) ) (1): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row) ) (2): CNBlock( (block): Sequential( (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768) (1): Permute() (2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) (3): Linear(in_features=768, out_features=3072, bias=True) (4): GELU(approximate=none) (5): Linear(in_features=3072, out_features=768, bias=True) (6): Permute() ) (stochastic_depth): StochasticDepth(p=0.1, mode=row) ) ) ) (avgpool): AdaptiveAvgPool2d(output_size=1) (classifier): Sequential( (0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True) (1): Flatten(start_dim=1, end_dim=-1) (2): Linear(in_features=768, out_features=4, bias=True) ) )
那么我们能够顺利的访问模型的每个结构的话,修改的话也就十分简单了。
model.classifier.add_module("add_dropout",nn.Dropout())
print(model)
参考链接: https://blog.csdn.net/ltochange/article/details/121421776
https://blog.csdn.net/qq_39332551/article/details/124943453
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。