赞
踩
参考视频:2.1 pytorch官方demo(Lenet)_哔哩哔哩_bilibili
Pytorch Tensor的通道排序:[batch,channel,height,width]
CIFAR10 dataset: It has the classes: ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’. The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.
为什么每计算一个batch就需要调用一次optimizer.zero_grad():
pytorch官网:-docs可查看各个函数的用法,-tutorials可以查看示例
data = torch.max(outputs, dim=1)
, then data
will be a tuple containing two tensors.
data[0]
: This tensor will contain the maximum values along dimension 1 (the class dimension) of the outputs
tensor. It will have a shape of [batch]
, where batch
is the number of validation images in the batch.data[1]
: This tensor will contain the indices of the maximum values along dimension 1 (the class dimension) of the outputs
tensor. It will also have a shape of [batch]
, where each element represents the predicted class label (index) for each validation image in the batch.get the size of a tensor along a specific dimension, you use the method size()
or the property shape[]
.
- import torch
- # Assuming val_label is a tensor with shape [batch_size, ...]
- # Using size() method
- size_along_first_dim = val_label.size(0)
- # Using shape property
- size_along_first_dim = val_label.shape[0]
- import torch.nn as nn
- import torch.nn.functional as F
-
- class LeNet(nn.Module):
- def __init__(self):
- super(LeNet, self).__init__()
- self.conv1 = nn.Conv2d(3, 16, 5)
- self.pool1 = nn.MaxPool2d(2, 2)
- self.conv2 = nn.Conv2d(16, 32, 5)
- self.pool2 = nn.MaxPool2d(2, 2)
- self.fc1 = nn.Linear(32*5*5, 120)
- self.fc2 = nn.Linear(120, 84)
- self.fc3 = nn.Linear(84, 10)
-
- def forward(self, x):
- x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28)
- x = self.pool1(x) # output(16, 14, 14)
- x = F.relu(self.conv2(x)) # output(32, 10, 10)
- x = self.pool2(x) # output(32, 5, 5)
- x = x.view(-1, 32*5*5) # output(32*5*5)
- x = F.relu(self.fc1(x)) # output(120)
- x = F.relu(self.fc2(x)) # output(84)
- x = self.fc3(x) # output(10)
- return x
- import torch
- import torchvision
- import torch.nn as nn
- from model import LeNet
- import torch.optim as optim
- import torchvision.transforms as transforms
-
- def main():
- '''
- ToTensor:Converts a PIL Image or numpy.ndarray (H x W x C) in the range
- [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
- Normalize: a tensor image with mean and standard deviation.
- Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
- channels, this transform will normalize each channel of the input ``torch.*Tensor`` i.e.,
- ``output[channel] = (input[channel] - mean[channel]) / std[channel]
- '''
- transform = transforms.Compose(
- [transforms.ToTensor(),
- transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
-
- # 50000张训练图片
- # 第一次使用时要将download设置为True才会自动去下载数据集
- train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
- download=False, transform=transform)
- #shuffle:是否打乱数据集
- train_loader = torch.utils.data.DataLoader(train_set, batch_size=36,
- shuffle=True, num_workers=0)
-
- # 10000张验证图片
- # 第一次使用时要将download设置为True才会自动去下载数据集
- val_set = torchvision.datasets.CIFAR10(root='./data', train=False,
- download=False, transform=transform)
- val_loader = torch.utils.data.DataLoader(val_set, batch_size=10000,
- shuffle=False, num_workers=0)
- val_data_iter = iter(val_loader) #将val_loader转化为一个可迭代的迭代器
- val_image, val_label = next(val_data_iter) #获取图片和标签值
-
- classes = ('plane', 'car', 'bird', 'cat',
- 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
- def imshow(img):
- img = img / 2 + 0.5 # unnormalize 返标准化
- npimg = img.numpy()
- plt.imshow(np.transpose(npimg, (1, 2, 0)))
- plt.show()
-
- # print labels
- print(' '.join(f'{classes[val_label[j]]:5s}' for j in range(4)))
- # show images
- imshow(torchvision.utils.make_grid(val_image))
-
- net = LeNet()
- loss_function = nn.CrossEntropyLoss()
- optimizer = optim.Adam(net.parameters(), lr=0.001)
-
- for epoch in range(5): # loop over the dataset multiple times
-
- running_loss = 0.0
- for step, data in enumerate(train_loader, start=0):
- '''
- step最多为50000/36
- 50000是因为该数据集的训练集有50000张图片,36是在下载数据集时设置的batch_size
- '''
- # get the inputs; data is a list of [inputs, labels]
- inputs, labels = data
-
- # zero the parameter gradients
- optimizer.zero_grad()
- # forward + backward + optimize
- outputs = net(inputs) #正向传播计算输出
- loss = loss_function(outputs, labels)#计算loss
- loss.backward()#反向传播
- optimizer.step()#参数更新
-
- # print statistics
- running_loss += loss.item()
- if step % 500 == 499: # print every 500 mini-batches
- '''
- %:这是模运算符,计算step除以500时的余数。
- 使用数字 499 而不是 500,以确保在第 500 次迭代之后立即执行操作,而不是在第 501 次迭代之后执行。
- '''
- with torch.no_grad():
- #在接下来的过程中不计算梯度,没有这一行的话在测试过程中也会计算误差损失梯度,会占用很多资源
- outputs = net(val_image) # [batch, 10]
- predict_y = torch.max(outputs, dim=1)[1]
- accuracy = torch.eq(predict_y, val_label).sum().item() / val_label.size(0)
-
- print('[%d, %5d] train_loss: %.3f test_accuracy: %.3f' %
- (epoch + 1, step + 1, running_loss / 500, accuracy))
- running_loss = 0.0
-
- print('Finished Training')
-
- save_path = './Lenet.pth'
- torch.save(net.state_dict(), save_path)
-
- if __name__ == '__main__':
- main()
在train时,如果是基于初始化权重进行迁移学习,需要在数据预处理时先将图片减去[123.68,116.78,103.94],这是imagenet的所有图片的三通道的均值,如果是从头训练,不需要减去
num_workers:线程数,windows系统只能为0
- import torch.nn as nn
- import torch
-
- # official pretrain weights
- model_urls = {
- 'vgg11': '<https://download.pytorch.org/models/vgg11-bbd30ac9.pth>',
- 'vgg13': '<https://download.pytorch.org/models/vgg13-c768596a.pth>',
- 'vgg16': '<https://download.pytorch.org/models/vgg16-397923af.pth>',
- 'vgg19': '<https://download.pytorch.org/models/vgg19-dcbb9e9d.pth>'
- }
-
- class VGG(nn.Module):
- def __init__(self, features, num_classes=1000, init_weights=False):
- super(VGG, self).__init__()
- self.features = features
- self.classifier = nn.Sequential(
- nn.Linear(512*7*7, 4096),
- nn.ReLU(True),
- nn.Dropout(p=0.5),
- nn.Linear(4096, 4096),
- nn.ReLU(True),
- nn.Dropout(p=0.5),
- nn.Linear(4096, num_classes)
- )
- if init_weights:
- self._initialize_weights()
-
- def forward(self, x):
- # N x 3 x 224 x 224
- x = self.features(x)
- # N x 512 x 7 x 7
- x = torch.flatten(x, start_dim=1)
- # N x 512*7*7
- x = self.classifier(x)
- return x
-
- def _initialize_weights(self):
- for m in self.modules():
- if isinstance(m, nn.Conv2d):
- # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
- nn.init.xavier_uniform_(m.weight)
- if m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.Linear):
- nn.init.xavier_uniform_(m.weight)
- # nn.init.normal_(m.weight, 0, 0.01)
- nn.init.constant_(m.bias, 0)
-
- def make_features(cfg: list):
- layers = []
- in_channels = 3
- for v in cfg:
- if v == "M":
- layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
- else:
- conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
- layers += [conv2d, nn.ReLU(True)]
- in_channels = v
- return nn.Sequential(*layers)#非关键字参数
-
- #数字:卷积核个数 'M':池化层
- cfgs = {
- 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#A
- 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#B
- 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],#D
- 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],#E
- }
-
- def vgg(model_name="vgg16", **kwargs):
- assert model_name in cfgs, "Warning: model number {} not in cfgs dict!".format(model_name)
- cfg = cfgs[model_name]
-
- model = VGG(make_features(cfg), **kwargs)
- return model
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。