当前位置:   article > 正文

VGG-Net16基础知识及实现Cifar-10分类(pytorch)_vgg16net

vgg16net

主要贡献:

  • 使用多个小卷积代替较大的卷积层,两个3*3的卷积核堆叠相当于5*5卷积核的视野(第一层过3*3后第二层继续为3*3的卷积)减少参数和增加非线性映射,也提升了网络9 深度。
  • 小池化层。AlexNet的3*3的池化核,VGG全部采用2*2的
  • 更多的卷积核使特征图的通道数更多,特征提取更全面,第一层64个通道,后续翻倍,最多为512
  • 测试阶段不适用全连接,替换为三个卷积层,从而不再局限于固定尺寸的输入,可以接受任意的宽度或者高。

模型结构

网络层

输入尺寸

核尺寸(长*宽*个数)

输出尺寸

参数量

卷积层C11

224*224*3

3*3*64/1

224*224*64

(3*3*3+1)*64

卷积层C12

224*224*64

3*3*64/1

224*224*64

(3*3*64+1)*64

下采样

224*224*64

2*2/2

112*112*64

0

卷积层C21

112*112*64

3*3*128/1

112*112*128

(3*3*64+1)*128

卷积层C22

112*112*128

3*3*128/1

112*112*128

(3*3*128+1)*128

下采样层

112*112*128

2*2/2

56*56*128

0

卷积层C31

56*56*128

3*3*256/1

56*56*256

(3*3*128+1)*256

卷积层C32

56*56*256

3*3*256/1

56*56*256

(3*3*256+1)*256

卷积层C33

56*56*256

3*3*256/1

56*56*256

(3*3*256+1)*256

下采样层

56*56*256

2*2/2

28*28*256

      0

卷积层C41

28*28*256

3*3*512/1

28*28*512

(3*3*256+1)*512

卷积层C42

28*28*512

3*3*512/1

28*28*512

(3*3*512+1)*512

卷积层C43

28*28*512

3*3*512/1

28*28*512

(3*3*512+1)*512

下采样层

28*28*512

2*2/2

14*14*512

0

卷积层C51

14*14*512

3*3*512/1

14*14*512

(3*3*512+1)*512

卷积层C52

14*14*512

3*3*512/1

14*14*512

(3*3*512+1)*512

卷积层C53

14*14*512

3*3*512/1

14*14*512

(3*3*512+1)*512

下采样层

14*14*512

2*2/2

7*7*512

0

全连接层FC1

7*7*512

7*7*512*4096

1*1*4096

(7*7*512+1)*4096

全连接层FC2

1*1*4096

4096*4096

1*4096

(4096+1)*4096

全连接层FC3

1*4096

4096*1000

1*1000

(4096+1)*1000

VGGNet包含了6个版本VGG11、VGG11-LRN(第一层采用LRN)、VGG13、VGG16-1、VGG16-3和VGG19。不同的后缀代表不不同的网络层数。VGG16-1表示后三组卷积块中最后一层卷积采用卷积核尺寸为1*1,-3为3*3。19位后三组每组多一层卷积。19为3*3的卷积。

  1. # model.py
  2. import torch
  3. from torch import nn
  4. class VGGNet16(nn.Module):
  5. def __init__(self, num_classes=10):
  6. super(VGGNet16, self).__init__()
  7. # 输入为224*224*3,输出为224*224*64,池化后为112*112*64
  8. self.Conv1 = nn.Sequential(
  9. nn.Conv2d(3, 64, kernel_size=3, padding=1),
  10. nn.BatchNorm2d(64),
  11. nn.ReLU(inplace=True),
  12. nn.Conv2d(64, 64, kernel_size=3, padding=1),
  13. nn.BatchNorm2d(64),
  14. nn.ReLU(inplace=True),
  15. nn.MaxPool2d(2, 2),
  16. )
  17. # 输入为112*112*64,输出为112*112*128,最大池化后为56*56*128
  18. self.Conv2 = nn.Sequential(
  19. nn.Conv2d(64, 128, kernel_size=3, padding=1),
  20. nn.BatchNorm2d(128),
  21. nn.ReLU(inplace=True),
  22. nn.Conv2d(128, 128, kernel_size=3, padding=1),
  23. nn.BatchNorm2d(128),
  24. nn.ReLU(inplace=True),
  25. nn.MaxPool2d(2, 2),
  26. )
  27. # 输入为56*56*128,输出为56*56*256,最大池化后为28*28*256
  28. self.Conv3 = nn.Sequential(
  29. nn.Conv2d(128, 256, kernel_size=3, padding=1),
  30. nn.BatchNorm2d(256),
  31. nn.ReLU(inplace=True),
  32. nn.Conv2d(256, 256, kernel_size=3, padding=1),
  33. nn.BatchNorm2d(256),
  34. nn.ReLU(inplace=True),
  35. nn.Conv2d(256, 256, kernel_size=3, padding=1),
  36. nn.BatchNorm2d(256),
  37. nn.ReLU(inplace=True),
  38. nn.MaxPool2d(2, 2),
  39. )
  40. # 输入为28*28*256,输出为28*28*512,最大池化后为14*14*512
  41. self.Conv4 = nn.Sequential(
  42. nn.Conv2d(256, 512, kernel_size=3, padding=1),
  43. nn.BatchNorm2d(512),
  44. nn.ReLU(inplace=True),
  45. nn.Conv2d(512, 512, kernel_size=3, padding=1),
  46. nn.BatchNorm2d(512),
  47. nn.ReLU(inplace=True),
  48. nn.Conv2d(512, 512, kernel_size=3, padding=1),
  49. nn.BatchNorm2d(512),
  50. nn.ReLU(inplace=True),
  51. nn.MaxPool2d(2, 2)
  52. )
  53. # 输入为14*14*512,输出为14*14*512,最大池化后为7*7*512
  54. self.Conv5 = nn.Sequential(
  55. nn.Conv2d(512, 512, kernel_size=3, padding=1),
  56. nn.BatchNorm2d(512),
  57. nn.ReLU(inplace=True),
  58. nn.Conv2d(512, 512, kernel_size=3, padding=1),
  59. nn.BatchNorm2d(512),
  60. nn.ReLU(inplace=True),
  61. nn.Conv2d(512, 512, kernel_size=3, padding=1),
  62. nn.BatchNorm2d(512),
  63. nn.ReLU(inplace=True),
  64. nn.MaxPool2d(2, 2),
  65. )
  66. self.Conv = nn.Sequential(
  67. self.Conv1,
  68. self.Conv2,
  69. self.Conv3,
  70. self.Conv4,
  71. self.Conv5,
  72. )
  73. self.classsifer = nn.Sequential(
  74. # 输入为7*7*512
  75. nn.Linear(7 * 7 * 512, 4096),
  76. nn.ReLU(inplace=True),
  77. nn.Dropout(p=0.5),
  78. nn.Linear(4096, 4096),
  79. nn.ReLU(inplace=True),
  80. nn.Dropout(p=0.5),
  81. nn.Linear(4096, num_classes),
  82. )
  83. def forward(self, x):
  84. x = self.Conv(x)
  85. x = torch.flatten(x, start_dim=1)
  86. # x = x.view(-1,7*7*512)
  87. x = self.classsifer(x),
  88. return x
  89. if __name__ == "__main__":
  90. # 随机产生1个3*224*224的tensor,对应相当于1个3通道的224*224的图像
  91. x = torch.rand([1, 3, 224, 224])
  92. model = VGGNet16()
  93. print(model)
  94. y = model(x)
  95. print(y)
  1. import os
  2. import torch
  3. from torch import nn
  4. from model import VGGNet16
  5. from torch import optim
  6. from torchvision import datasets, transforms
  7. import torch.utils.data
  8. # batch_size = 256
  9. learning_rate = 1e-3
  10. num_epoches = 100
  11. data_transform = transforms.Compose([
  12. transforms.RandomResizedCrop(224),
  13. transforms.RandomHorizontalFlip(),
  14. transforms.ToTensor(),
  15. transforms.Normalize(mean = [0.485, 0.456, 0.406],
  16. std = [0.229, 0.224, 0.225])
  17. ])
  18. # 下载CIFAR-10数据集
  19. train_dataset = datasets.CIFAR10(root='./data', train=True, transform=data_transform, download=True)
  20. train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset,
  21. batch_size=64,
  22. shuffle=True)
  23. test_dataset = datasets.CIFAR10('./data', train=False, transform=data_transform, download=True)
  24. test_dataloader = torch.utils.data.DataLoader(test_dataset,
  25. batch_size=64,
  26. shuffle=False)
  27. device = 'cuda' if torch.cuda.is_available() else 'cpu'
  28. model = VGGNet16().to(device)
  29. # loss和optimizer
  30. loss_fn = nn.CrossEntropyLoss()
  31. optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
  32. lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
  33. def train(dataloader, model, loss_fn, optimizer):
  34. loss, current, n = 0.0, 0.0, 0
  35. for batch, (x, y) in enumerate(dataloader):
  36. # 前向传播
  37. x, y = x.to(device), y.to(device)
  38. output = model(x)[0]
  39. cur_loss = loss_fn(output, y)
  40. _, pred = torch.max(output, axis=1)
  41. cur_acc = torch.sum(y==pred)/output.shape[0]
  42. # 反向传播
  43. # 清楚过往梯度
  44. optimizer.zero_grad()
  45. cur_loss.backward()
  46. # 根据梯度更新网络参数
  47. optimizer.step()
  48. loss += cur_loss.item()
  49. current += cur_acc.item()
  50. n += 1
  51. train_loss = loss / n
  52. train_acc = current / n
  53. # 计算训练的错误率
  54. print('train_loss' + str(train_loss))
  55. # 计算训练的准确率
  56. print('train_acc' + str(train_acc))
  57. def val(dataloader, model, loss_fn):
  58. model.eval()
  59. loss, current, n = 0.0, 0.0, 0
  60. # with torch.no_grad():将with语句包裹起来的部分停止梯度的更新,从而节省了GPU算力和显存,但是并不会影响dropout和BN层的行为
  61. with torch.no_grad():
  62. for batch, (x, y) in enumerate(dataloader):
  63. # 前向传播
  64. x, y = x.to(device), y.to(device)
  65. output = model(x)[0]
  66. cur_loss = loss_fn(output, y)
  67. _, pred = torch.max(output, axis=1)
  68. cur_acc = torch.sum(y == pred) / output.shape[0]
  69. loss += cur_loss.item()
  70. current += cur_acc.item()
  71. n += 1
  72. # 验证的错误率
  73. print("val_loss: " + str(loss / n))
  74. print("val_acc: " + str(current / n))
  75. # 返回模型准确率
  76. return current / n
  77. min_acc = 0
  78. for t in range(num_epoches):
  79. print(f'epoch {t + 1}\n-----------------')
  80. train(train_dataloader, model, loss_fn, optimizer)
  81. a = val(test_dataloader, model, loss_fn)
  82. if a > min_acc:
  83. folder = 'save_model'
  84. if not os.path.exists(folder):
  85. os.mkdir('save_model')
  86. min_acc = a
  87. print('save best model')
  88. torch.save(model.state_dict(), 'save_model/best_model.pth')
  89. print('Done!')

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/957460
推荐阅读
相关标签
  

闽ICP备14008679号