赞
踩
主要贡献:
模型结构
输入尺寸 | 核尺寸(长*宽*个数) | 输出尺寸 | 参数量 | |
卷积层C11 | 224*224*3 | 3*3*64/1 | 224*224*64 | (3*3*3+1)*64 |
卷积层C12 | 224*224*64 | 3*3*64/1 | 224*224*64 | (3*3*64+1)*64 |
下采样层 | 224*224*64 | 2*2/2 | 112*112*64 | 0 |
卷积层C21 | 112*112*64 | 3*3*128/1 | 112*112*128 | (3*3*64+1)*128 |
卷积层C22 | 112*112*128 | 3*3*128/1 | 112*112*128 | (3*3*128+1)*128 |
下采样层 | 112*112*128 | 2*2/2 | 56*56*128 | 0 |
卷积层C31 | 56*56*128 | 3*3*256/1 | 56*56*256 | (3*3*128+1)*256 |
卷积层C32 | 56*56*256 | 3*3*256/1 | 56*56*256 | (3*3*256+1)*256 |
卷积层C33 | 56*56*256 | 3*3*256/1 | 56*56*256 | (3*3*256+1)*256 |
下采样层 | 56*56*256 | 2*2/2 | 28*28*256 | 0 |
卷积层C41 | 28*28*256 | 3*3*512/1 | 28*28*512 | (3*3*256+1)*512 |
卷积层C42 | 28*28*512 | 3*3*512/1 | 28*28*512 | (3*3*512+1)*512 |
卷积层C43 | 28*28*512 | 3*3*512/1 | 28*28*512 | (3*3*512+1)*512 |
下采样层 | 28*28*512 | 2*2/2 | 14*14*512 | 0 |
卷积层C51 | 14*14*512 | 3*3*512/1 | 14*14*512 | (3*3*512+1)*512 |
卷积层C52 | 14*14*512 | 3*3*512/1 | 14*14*512 | (3*3*512+1)*512 |
卷积层C53 | 14*14*512 | 3*3*512/1 | 14*14*512 | (3*3*512+1)*512 |
下采样层 | 14*14*512 | 2*2/2 | 7*7*512 | 0 |
全连接层FC1 | 7*7*512 | 7*7*512*4096 | 1*1*4096 | (7*7*512+1)*4096 |
全连接层FC2 | 1*1*4096 | 4096*4096 | 1*4096 | (4096+1)*4096 |
全连接层FC3 | 1*4096 | 4096*1000 | 1*1000 | (4096+1)*1000 |
VGGNet包含了6个版本VGG11、VGG11-LRN(第一层采用LRN)、VGG13、VGG16-1、VGG16-3和VGG19。不同的后缀代表不不同的网络层数。VGG16-1表示后三组卷积块中最后一层卷积采用卷积核尺寸为1*1,-3为3*3。19位后三组每组多一层卷积。19为3*3的卷积。
- # model.py
- import torch
- from torch import nn
-
-
- class VGGNet16(nn.Module):
- def __init__(self, num_classes=10):
- super(VGGNet16, self).__init__()
- # 输入为224*224*3,输出为224*224*64,池化后为112*112*64
- self.Conv1 = nn.Sequential(
- nn.Conv2d(3, 64, kernel_size=3, padding=1),
- nn.BatchNorm2d(64),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(64, 64, kernel_size=3, padding=1),
- nn.BatchNorm2d(64),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(2, 2),
- )
- # 输入为112*112*64,输出为112*112*128,最大池化后为56*56*128
- self.Conv2 = nn.Sequential(
- nn.Conv2d(64, 128, kernel_size=3, padding=1),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(128, 128, kernel_size=3, padding=1),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(2, 2),
- )
- # 输入为56*56*128,输出为56*56*256,最大池化后为28*28*256
- self.Conv3 = nn.Sequential(
- nn.Conv2d(128, 256, kernel_size=3, padding=1),
- nn.BatchNorm2d(256),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(256, 256, kernel_size=3, padding=1),
- nn.BatchNorm2d(256),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(256, 256, kernel_size=3, padding=1),
- nn.BatchNorm2d(256),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(2, 2),
- )
- # 输入为28*28*256,输出为28*28*512,最大池化后为14*14*512
- self.Conv4 = nn.Sequential(
- nn.Conv2d(256, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(512, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(512, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(2, 2)
- )
- # 输入为14*14*512,输出为14*14*512,最大池化后为7*7*512
- self.Conv5 = nn.Sequential(
- nn.Conv2d(512, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(512, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
-
- nn.Conv2d(512, 512, kernel_size=3, padding=1),
- nn.BatchNorm2d(512),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(2, 2),
- )
- self.Conv = nn.Sequential(
- self.Conv1,
- self.Conv2,
- self.Conv3,
- self.Conv4,
- self.Conv5,
- )
- self.classsifer = nn.Sequential(
- # 输入为7*7*512
- nn.Linear(7 * 7 * 512, 4096),
- nn.ReLU(inplace=True),
- nn.Dropout(p=0.5),
-
- nn.Linear(4096, 4096),
- nn.ReLU(inplace=True),
- nn.Dropout(p=0.5),
-
- nn.Linear(4096, num_classes),
- )
-
- def forward(self, x):
- x = self.Conv(x)
- x = torch.flatten(x, start_dim=1)
- # x = x.view(-1,7*7*512)
- x = self.classsifer(x),
- return x
-
-
- if __name__ == "__main__":
- # 随机产生1个3*224*224的tensor,对应相当于1个3通道的224*224的图像
- x = torch.rand([1, 3, 224, 224])
- model = VGGNet16()
- print(model)
- y = model(x)
- print(y)
- import os
-
- import torch
- from torch import nn
- from model import VGGNet16
- from torch import optim
- from torchvision import datasets, transforms
- import torch.utils.data
-
- # batch_size = 256
- learning_rate = 1e-3
- num_epoches = 100
-
- data_transform = transforms.Compose([
- transforms.RandomResizedCrop(224),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- transforms.Normalize(mean = [0.485, 0.456, 0.406],
- std = [0.229, 0.224, 0.225])
- ])
-
- # 下载CIFAR-10数据集
- train_dataset = datasets.CIFAR10(root='./data', train=True, transform=data_transform, download=True)
- train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,
- batch_size=64,
- shuffle=True)
- test_dataset = datasets.CIFAR10('./data', train=False, transform=data_transform, download=True)
- test_dataloader = torch.utils.data.DataLoader(test_dataset,
- batch_size=64,
- shuffle=False)
-
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
- model = VGGNet16().to(device)
-
- # loss和optimizer
- loss_fn = nn.CrossEntropyLoss()
- optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
- lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
-
-
- def train(dataloader, model, loss_fn, optimizer):
- loss, current, n = 0.0, 0.0, 0
- for batch, (x, y) in enumerate(dataloader):
- # 前向传播
- x, y = x.to(device), y.to(device)
- output = model(x)[0]
- cur_loss = loss_fn(output, y)
- _, pred = torch.max(output, axis=1)
- cur_acc = torch.sum(y==pred)/output.shape[0]
-
- # 反向传播
- # 清楚过往梯度
- optimizer.zero_grad()
- cur_loss.backward()
- # 根据梯度更新网络参数
- optimizer.step()
- loss += cur_loss.item()
- current += cur_acc.item()
- n += 1
-
- train_loss = loss / n
- train_acc = current / n
-
- # 计算训练的错误率
- print('train_loss' + str(train_loss))
- # 计算训练的准确率
- print('train_acc' + str(train_acc))
-
-
- def val(dataloader, model, loss_fn):
- model.eval()
- loss, current, n = 0.0, 0.0, 0
- # with torch.no_grad():将with语句包裹起来的部分停止梯度的更新,从而节省了GPU算力和显存,但是并不会影响dropout和BN层的行为
- with torch.no_grad():
- for batch, (x, y) in enumerate(dataloader):
- # 前向传播
- x, y = x.to(device), y.to(device)
- output = model(x)[0]
- cur_loss = loss_fn(output, y)
- _, pred = torch.max(output, axis=1)
- cur_acc = torch.sum(y == pred) / output.shape[0]
-
- loss += cur_loss.item()
- current += cur_acc.item()
- n += 1
-
- # 验证的错误率
- print("val_loss: " + str(loss / n))
- print("val_acc: " + str(current / n))
- # 返回模型准确率
- return current / n
-
- min_acc = 0
- for t in range(num_epoches):
- print(f'epoch {t + 1}\n-----------------')
- train(train_dataloader, model, loss_fn, optimizer)
- a = val(test_dataloader, model, loss_fn)
- if a > min_acc:
- folder = 'save_model'
- if not os.path.exists(folder):
- os.mkdir('save_model')
- min_acc = a
- print('save best model')
-
- torch.save(model.state_dict(), 'save_model/best_model.pth')
-
- print('Done!')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。