赞
踩
准确率 95.31%
几个关键点:
1、改模型:原始的resnet18首层使用的7x7的卷积核,CIFAR10图片太小不适合,要改成3x3的,步长和padding都要一并改成1。因为图太小,最大池化层也同样没用,删掉。最后一个全连接层输出改成10。
2、图片增强不要太多,只要训练集和验证集结果没有出现10%以上的差距都算不上过拟合。
3、学习率从0.1开始,10个epoch跑完loss值没有下降的话衰减50%
4、损失函数用CrossEntropyLoss
5、优化器用SGD
改模型代码:
- # 定义模型
- model_ft = torchvision.models.resnet18(pretrained=False)
-
- # 修改模型
- model_ft.conv1 = nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False) # 首层改成3x3卷积核
- model_ft.maxpool = nn.MaxPool2d(1, 1, 0) # 图像太小 本来就没什么特征 所以这里通过1x1的池化核让池化层失效
- num_ftrs = model_ft.fc.in_features # 获取(fc)层的输入的特征数
- model_ft.fc = nn.Linear(num_ftrs, 10)
这里的最大池化层实在是想不出什么好办法直接删掉,只能用这个办法让其失效
如果不想用原始的模型也能自己写个
下面是我是随便写的一个
- # 实现Resnet18
- """
- ResNet18 是由17个卷积层和1个全连接层组成
- 下采样层的1x1卷积不算
- 池化不算 激活不算
- 主要思想是一个基础层 然后反复的重复这个基础层
- """
- import torch
- from torch import nn
-
- # 基础块
- from torch.nn import Conv2d, BatchNorm2d, ReLU, MaxPool2d, AdaptiveAvgPool2d, Linear
-
-
- class BasicBlock(nn.Module):
-
- def __init__(self, in_features, out_features) -> None:
- super().__init__()
-
- self.in_features = in_features
- self.out_features = out_features
-
- stride = 1
- _features = out_features
- if self.in_features != self.out_features:
- # 在输入通道和输出通道不相等的情况下计算通道是否为2倍差值
- if self.out_features / self.in_features == 2.0:
- stride = 2 # 在输出特征是输入特征的2倍的情况下 要想参数不翻倍 步长就必须翻倍
- else:
- raise ValueError("输出特征数最多为输入特征数的2倍!")
-
- self.conv1 = Conv2d(in_features, _features, kernel_size=3, stride=stride, padding=1, bias=False)
- self.bn1 = BatchNorm2d(_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
- self.relu = ReLU(inplace=True)
- self.conv2 = Conv2d(_features, _features, kernel_size=3, stride=1, padding=1, bias=False)
- self.bn2 = BatchNorm2d(_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
-
- # 下采样
- self.downsample = None if self.in_features == self.out_features else nn.Sequential(
- Conv2d(in_features, out_features, kernel_size=1, stride=2, bias=False),
- BatchNorm2d(out_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
- )
-
- def forward(self, x):
- identity = x
- out = self.conv1(x)
- out = self.bn1(out)
- out = self.relu(out)
- out = self.conv2(out)
- out = self.bn2(out)
-
- # 输入输出的特征数不同时使用下采样层
- if self.in_features != self.out_features:
- identity = self.downsample(x)
-
- # 残差求和
- out += identity
- out = self.relu(out)
- return out
-
-
- class ResNet18(nn.Module):
- def __init__(self) -> None:
- super().__init__()
-
- self.conv1 = Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
- self.bn1 = BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
- self.relu = ReLU(inplace=True)
- # self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
- self.layer1 = nn.Sequential(
- BasicBlock(64, 64),
- BasicBlock(64, 64)
- )
- self.layer2 = nn.Sequential(
- BasicBlock(64, 128),
- BasicBlock(128, 128)
- )
- self.layer3 = nn.Sequential(
- BasicBlock(128, 256),
- BasicBlock(256, 256)
- )
- self.layer4 = nn.Sequential(
- BasicBlock(256, 512),
- BasicBlock(512, 512)
- )
- self.avgpool = AdaptiveAvgPool2d(output_size=(1, 1))
- self.fc = Linear(in_features=512, out_features=10, bias=True)
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- # x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x) # <---- 输出为{Tensor:(64,512,1,1)}
- x = torch.flatten(x, 1) # <----------------这里是个坑 很容易漏 从池化层到全连接需要一个压平 输出为{Tensor:(64,512)}
- x = self.fc(x) # <------------ 输出为{Tensor:(64,10)}
- return x
-
-
- # 模型数据验证
-
- if __name__ == "__main__":
- mode = ResNet18()
- print(mode)
- data = torch.ones((64, 3, 32, 32))
- output = mode(data)
- print(output.shape)
图片增强
- transforms.Compose([
- transforms.ToTensor()
- , transforms.RandomCrop(32, padding=4) # 先四周填充0,在吧图像随机裁剪成32*32
- , transforms.RandomHorizontalFlip(p=0.5) # 随机水平翻转 选择一个概率概率
- , transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
- ])
最开始我想的是为了避免过拟合,数据增强这里可以多做一些,所以图片旋转,水平翻转,垂直翻转,去色,随机调整色相饱和度对比度亮度都做了,但是验证集准确率死活上不了90%,一轮测试要搞几个小时,实在是吐血啊!
有兴趣的可以试试:
- train_transforms = transforms.Compose([
-
- transforms.RandomRotation(45) # 随机旋转,-45到45度之间随机选
- , transforms.RandomCrop(32, padding=4) # 先四周填充0,在吧图像随机裁剪成32*32
- , transforms.RandomHorizontalFlip(p=0.5) # 随机水平翻转 选择一个概率概率
- , transforms.RandomVerticalFlip(p=0.5) # 随机垂直翻转
- , transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1)
- # 参数1为亮度,参数2为对比度,参数3为饱和度,参数4为色相 全部是随机变化
- , transforms.RandomGrayscale(p=0.025) # 概率转换成灰度率,3通道就是R=G=B
- , transforms.ToTensor()
- , transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
- ])
还有一个坑,不要把CIFIA10的数据放大到224,然后直接用Resnet18训练,这样会很慢的,图片的预处理都是在CPU上完成的。暂时没找到什么好办法把预处理放到GPU上做。
如果还想再快点可以把CIFIA10的训练集划分成8:2的比例80%做训练集,20%做验证集,数据也够,一样能出结果,最后在CIFIA10的验证集上跑一把测试结果。
完整代码:
- # 使用Resnet18原始model
- # 用完整的测试集和验证集
-
- import time
- import copy
- import numpy as np
- import torch
- import torchvision.models
- from tqdm import tqdm
- from torchvision.transforms import transforms
- from torch import nn, optim
- from torch.utils.data import DataLoader
- from torchvision.datasets import CIFAR10
- from torch.utils.tensorboard import SummaryWriter
-
-
- if __name__ == "__main__":
-
- # 这里面的变量都相当于全局变量 !!
-
- # GPU计算
- device = torch.device("cuda")
-
- # 训练总轮数
- total_epochs = 250
- # 每次取出样本数
- batch_size = 128
- # 初始学习率
- Lr = 0.1
-
- DATASET_PATH = '/kaggle/input/cifar10-python'
- SAVE_PATH = '/kaggle/working/'
- filename = '{}best_cnn_model'.format(SAVE_PATH) # 文件扩展名在保存时添加
-
- torch.backends.cudnn.benchmark = True
-
- # 准备数据
- data_transforms = {
- 'train': transforms.Compose([
- transforms.ToTensor()
- , transforms.RandomCrop(32, padding=4) # 先四周填充0,在吧图像随机裁剪成32*32
- , transforms.RandomHorizontalFlip(p=0.5) # 随机水平翻转 选择一个概率概率
- , transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 均值,标准差
- ]),
- 'valid': transforms.Compose([
- transforms.ToTensor()
- , transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
- ]),
- }
- # 准备数据 这里将训练集和验证集写到了一个list里 否则后面的训练与验证阶段重复代码太多
- image_datasets = {
- x: CIFAR10(DATASET_PATH, train=True if x == 'train' else False,
- transform=data_transforms[x], download=True) for x in ['train', 'valid']}
-
- dataloaders: dict = {
- x: torch.utils.data.DataLoader(
- image_datasets[x], batch_size=batch_size, shuffle=True if x == 'train' else False
- ) for x in ['train', 'valid']
- }
-
- # 定义模型
- model_ft = torchvision.models.resnet18(pretrained=False)
-
- # 修改模型
- model_ft.conv1 = nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False) # 首层改成3x3卷积核
- model_ft.maxpool = nn.MaxPool2d(1, 1, 0) # 图像太小 本来就没什么特征 所以这里通过1x1的池化核让池化层失效
- num_ftrs = model_ft.fc.in_features # 获取(fc)层的输入的特征数
- model_ft.fc = nn.Linear(num_ftrs, 10)
-
- model_ft.to(device)
- # 创建损失函数
- loss_fn = nn.CrossEntropyLoss()
- loss_fn.to(device)
-
- # 训练模型
- # 显示要训练的模型
- print("==============当前模型要训练的层==============")
- for name, params in model_ft.named_parameters():
- if params.requires_grad:
- print(name)
-
- # 训练模型所需参数
- # 用于记录损失值未发生变化batch数
- counter = 0
- # 记录训练次数
- total_step = {
- 'train': 0, 'valid': 0
- }
- # 记录开始时间
- since = time.time()
- # 记录当前最小损失值
- valid_loss_min = np.Inf
- # 保存模型文件的尾标
- save_num = 0
- # 保存最优正确率
- best_acc = 0
-
- for epoch in range(total_epochs):
- # 动态调整学习率
- if counter / 10 == 1:
- counter = 0
- Lr = Lr * 0.5
-
- # 在每个epoch里重新创建优化器???
- optimizer = optim.SGD(model_ft.parameters(), lr=Lr, momentum=0.9, weight_decay=5e-4)
-
- print('Epoch {}/{}'.format(epoch + 1, total_epochs))
- print('-' * 10)
- print()
- # 训练和验证 每一轮都是先训练train 再验证valid
- for phase in ['train', 'valid']:
- # 调整模型状态
- if phase == 'train':
- model_ft.train() # 训练
- else:
- model_ft.eval() # 验证
-
- # 记录损失值
- running_loss = 0.0
- # 记录正确个数
- running_corrects = 0
-
- # 一次读取一个batch里面的全部数据
- for inputs, labels in tqdm(dataloaders[phase]):
- inputs = inputs.to(device)
- labels = labels.to(device)
-
- # 梯度清零
- optimizer.zero_grad()
- # 只有训练的时候计算和更新梯度
- with torch.set_grad_enabled(phase == 'train'):
- outputs = model_ft(inputs)
- loss = loss_fn(outputs, labels)
-
- # torch.max() 返回的是一个元组 第一个参数是返回的最大值的数值 第二个参数是最大值的序号
- _, preds = torch.max(outputs, 1) # 前向传播 这里可以测试 在valid时梯度是否变化
-
- # 训练阶段更新权重
- if phase == 'train':
- loss.backward() # 反向传播
- optimizer.step() # 优化权重
- # TODO:在SummaryWriter中记录学习率
- # ....
-
- # 计算损失值
- running_loss += loss.item() * inputs.size(0) # loss计算的是平均值,所以要乘上batch-size,计算损失的总和
- running_corrects += (preds == labels).sum() # 计算预测正确总个数
- # 每个batch加1次
- total_step[phase] += 1
-
- # 一轮训练完后计算损失率和正确率
- epoch_loss = running_loss / len(dataloaders[phase].sampler) # 当前轮的总体平均损失值
- epoch_acc = float(running_corrects) / len(dataloaders[phase].sampler) # 当前轮的总正确率
-
- time_elapsed = time.time() - since
- print()
- print('当前总耗时 {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
- print('{} Loss: {:.4f}[{}] Acc: {:.4f}'.format(phase, epoch_loss, counter, epoch_acc))
-
- if phase == 'valid':
- # 得到最好那次的模型
- if epoch_loss < valid_loss_min: # epoch_acc > best_acc:
-
- best_acc = epoch_acc
-
- # 保存当前模型
- best_model_wts = copy.deepcopy(model_ft.state_dict())
- state = {
- 'state_dict': model_ft.state_dict(),
- 'best_acc': best_acc,
- 'optimizer': optimizer.state_dict(),
- }
- # 只保存最近2次的训练结果
- save_num = 0 if save_num > 1 else save_num
- save_name_t = '{}_{}.pth'.format(filename, save_num)
- torch.save(state, save_name_t) # \033[1;31m 字体颜色:红色\033[0m
- print("已保存最优模型,准确率:\033[1;31m {:.2f}%\033[0m,文件名:{}".format(best_acc * 100, save_name_t))
- save_num += 1
- valid_loss_min = epoch_loss
- counter = 0
- else:
- counter += 1
-
- print()
- print('当前学习率 : {:.7f}'.format(optimizer.param_groups[0]['lr']))
- print()
-
- # 训练结束
- time_elapsed = time.time() - since
- print()
- print('任务完成!')
- print('任务完成总耗时 {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
- print('最高验证集准确率: {:4f}'.format(best_acc))
- save_num = save_num - 1
- save_num = save_num if save_num < 0 else 1
- save_name_t = '{}_{}.pth'.format(filename, save_num)
- print('最优模型保存在:{}'.format(save_name_t))
-
运行结果片段:
Epoch 1/250 ---------- 100%|██████████| 391/391 [00:50<00:00, 7.80it/s] 当前总耗时 0m 50s train Loss: 1.8680[0] Acc: 0.3292 100%|██████████| 79/79 [00:03<00:00, 20.67it/s] 当前总耗时 0m 54s valid Loss: 1.4895[0] Acc: 0.4499 已保存最优模型,准确率: 44.99%,文件名:/kaggle/working/best_cnn_model_0.pth 当前学习率 : 0.1000000 Epoch 11/250 ---------- 100%|██████████| 391/391 [00:41<00:00, 9.33it/s] 当前总耗时 8m 31s train Loss: 0.4722[2] Acc: 0.8391 100%|██████████| 79/79 [00:03<00:00, 21.90it/s] 当前总耗时 8m 35s valid Loss: 0.5824[2] Acc: 0.8068 已保存最优模型,准确率: 80.68%,文件名:/kaggle/working/best_cnn_model_0.pth 当前学习率 : 0.1000000 Epoch 46/250 ---------- 100%|██████████| 391/391 [00:41<00:00, 9.46it/s] 当前总耗时 35m 11s train Loss: 0.2126[0] Acc: 0.9276 100%|██████████| 79/79 [00:03<00:00, 21.00it/s] 当前总耗时 35m 15s valid Loss: 0.2970[0] Acc: 0.9004 已保存最优模型,准确率: 90.04%,文件名:/kaggle/working/best_cnn_model_1.pth 当前学习率 : 0.0500000 Epoch 101/250 ---------- 100%|██████████| 391/391 [00:42<00:00, 9.23it/s] 当前总耗时 77m 28s train Loss: 0.0037[0] Acc: 0.9995 100%|██████████| 79/79 [00:03<00:00, 21.22it/s] 当前总耗时 77m 32s valid Loss: 0.1916[0] Acc: 0.9500 已保存最优模型,准确率: 95.00%,文件名:/kaggle/working/best_cnn_model_1.pth 当前学习率 : 0.0031250 Epoch 128/250 ---------- 100%|██████████| 391/391 [00:42<00:00, 9.27it/s] 当前总耗时 98m 10s train Loss: 0.0021[4] Acc: 0.9999 100%|██████████| 79/79 [00:03<00:00, 21.68it/s] 当前总耗时 98m 14s valid Loss: 0.1775[4] Acc: 0.9522 已保存最优模型,准确率: 95.22%,文件名:/kaggle/working/best_cnn_model_0.pth 当前学习率 : 0.0031250 Epoch 129/250 ---------- 100%|██████████| 391/391 [00:41<00:00, 9.32it/s] 当前总耗时 98m 56s train Loss: 0.0020[0] Acc: 1.0000 100%|██████████| 79/79 [00:03<00:00, 21.31it/s] 当前总耗时 98m 59s valid Loss: 0.1786[0] Acc: 0.9529 当前学习率 : 0.0031250
最后吐槽下google的Colab的那个 Colab Pro 这玩意并没有什么卵用,100个运行时很快就烧完,然后你就和白嫖用户一样了,不要花这冤枉钱,直接白嫖,存最近的两个最优模型,这样就是被踢了至少还能明天继续。或者把模型下载下来在传到Kaggle上继续白嫖。
不过最好还是能有自己的GPU,代码跑起来不用守着,可以去睡觉了,睡醒了再看结果。但是现在的显卡价格实在是一言难尽啊……
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。