赞
踩
若需要数据以及源代码加群753035545
当我们进行这样处理是会产生非常多的参数,我我们可以通过以下详细过程进行分析,如何利用最少的参数等到相同的结果
这只是一个分类情况,就用了16个参数,之后看最后的结果
使用了16*9个参数才能计算完成,而且这只是一层,我们发现最后的结果一般只与四个参数有关,我们可不可将这四个参数提取出来
最后通过一个卷积层将数据进行转换
在上图中,输入和卷积核都是张量,卷积运算就是卷积分别乘以输入张量中的每个元素,然后输出一个代表每个输入信息的张量,其中卷积核又称为权重过滤器,简称过滤器
第一个6这样算出来的,那么第二个值是不是就是吧卷积层往右边平移一个单位,接着算出来,但是到第一行的四个数时卷积核窗口将在输入矩阵之外,我们就需要填充
吧图片尺寸进行扩展,扩展区域补零,补零的圈数为过滤器的大小为 f f f,步幅为 s s s,数据大小为 n n n,则圈数 p = ( f − 1 ) / 2 p=(f-1)/2 p=(f−1)/2,卷积后的大小为 ( n + 2 p − f ) / s + 1 (n+2p-f)/s+1 (n+2p−f)/s+1
当数据是一维可以使用一个卷积,但是图像数据多为三维,就要用三个卷积层,之后将三层卷积层数据加起来,
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode=‘zeros’)
池化层又称下采样,卷积后的图像可以直接用来训练分类器,可是这样将面临巨大的计算,容易产生过拟合,为了降低训练网络参数及模型的过拟合程度,对卷积层进行池化,一般有三种
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
import torch
import torchvision
import torchvision.transforms as transforms
#ToTensor 把一个取值范围是[0,255]的PIL.Image转换成Tensor,形状为(H,W,C)的numpy.ndarray,转化为形状为(C,H,W),取值范围是[0,1]的torch.FloatTensor # Normalize 规范化到[-1,1] 第一个取值为mean ,第二个取值为std transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # num_workers:使用多进程加载的进程数,0代表不使用多进程 trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
trainloader
<torch.utils.data.dataloader.DataLoader at 0x13bf0281898>
import matplotlib.pyplot as plt import numpy as np %matplotlib inline # 显示图像 # 因为归一话的时候是先减去平均值0.5 ,然后再除以标准偏差0.5 那么反归一化就是先乘以0.5,再加0.5。 # 调换数组行列值的索引值,这一步相当于是将前面的数据处理返回,查看原始数据 ###transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # 随机获取部分训练数据 dataiter = iter(trainloader) images, labels = dataiter.next() # 显示图像 imshow(torchvision.utils.make_grid(images)) # 打印标签 print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
horse truck frog truck
images.shape
torch.Size([4, 3, 32, 32])
import torch.nn as nn import torch.nn.functional as F device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 这里的out_channels可以随便输入,但是第二个卷积层就要使用第一个卷积层的数值,相当于改变通道个数 # 注意到全链接层要修改下参数x.view(-1,16*6*6) 其中 16表示out_channels,但是后面的6*6表示每次改变后图像的大小、 # 第一次 图片为32*32*3 进过conv1变成 28*28*8 其中28为(32-5/1)+1 # 第二次 图片为28*28*8 MaxPool2d 14*14*8 其中14 为(28/2) # 第三次 图片为14*14*8 进过conv2变成 12*12*16 其中14为(14-3/1)+1 # 第四次 图片为12*12*16 进过MaxPool2d变成 6*6*16 其中6为(12/2) # 第五次 图片为6*6*16 进过Linear变成 576 576为6*6*16 class CNNNet(nn.Module): def __init__(self): super(CNNNet,self).__init__() self.conv1 = nn.Conv2d(in_channels=3,out_channels=8,kernel_size=5,stride=1) self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2) self.conv2 = nn.Conv2d(in_channels=8,out_channels=16,kernel_size=3,stride=1) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.fc1 = nn.Linear(576,128) self.fc2 = nn.Linear(128,10) def forward(self,x): x=self.pool1(F.relu(self.conv1(x))) x=self.pool2(F.relu(self.conv2(x))) #print(x.shape) x=x.view(-1,16*6*6) x=F.relu(self.fc2(F.relu(self.fc1(x)))) return x net = CNNNet() net=net.to(device)
print("net have {} paramerters in total".format(sum(x.numel() for x in net.parameters())))
net have 76922 paramerters in total
import torch.optim as optim
LR=0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#optimizer = optim.Adam(net.parameters(), lr=LR)
print(net)
CNNNet(
(conv1): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
(pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=576, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=10, bias=True)
)
#取模型中的前四层
nn.Sequential(*list(net.children())[:4])
Sequential(
(0): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
for epoch in range(1): running_loss = 0.0 for i, data in enumerate(trainloader, 0): # 获取训练数据 inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) # 权重参数梯度清零 optimizer.zero_grad() # 正向及反向传播 outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # 显示损失值 running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 break print('Finished Training')
Finished Training
dataiter = iter(testloader)
images, labels = dataiter.next()
#images, labels = images.to(device), labels.to(device)
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
GroundTruth: frog frog deer truck
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]for j in range(4)))
Predicted: horse horse horse horse
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
Accuracy of the network on the 10000 test images: 9 %
class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % ( classes[i], 100 * class_correct[i] / class_total[i]))
Accuracy of plane : 0 %
Accuracy of car : 0 %
Accuracy of bird : 0 %
Accuracy of cat : 0 %
Accuracy of deer : 0 %
Accuracy of dog : 51 %
Accuracy of frog : 0 %
Accuracy of horse : 0 %
Accuracy of ship : 0 %
Accuracy of truck : 40 %
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。