赞
踩
使用残差网络(ResNets)可以构建非常深的卷积网络。理论上讲,更深的网络可以表现更复杂的特征。但网络的加深往往会出现训练损失的上升,使用残差网络可以较好解决这一问题。
残差网络的一个简单的结构如下:
本文所搭建残差网络结构如下:
残差网络如下:
- class Residual(torch.nn.Module):
- def __init__(self, in_channels, out_channels, stride=1):
- super(Residual, self).__init__()
- self.stride = stride
- # 卷积层使用same填充,由于pytorch没有提供自动填充的操作,需要手算填充的大小
- self.before_relu = torch.nn.Sequential(
- torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1),#stride=2时,h与w减半
- torch.nn.BatchNorm2d(out_channels),
- torch.nn.ReLU(inplace=True),
- torch.nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
- torch.nn.BatchNorm2d(out_channels),
- )
- self.relu1 = torch.nn.ReLU(inplace=True)
- # 输入和输出通道数不同时,需要通过1x1卷积改变输入数据的通道数
- if in_channels != out_channels:
- self.conv1x1 = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
- else:
- self.conv1x1 = None
-
- def forward(self, x):
- out1 = self.before_relu(x)
-
- if self.conv1x1:
- x = self.conv1x1(x)
-
- out = self.relu1(out1 + x)
- return out
前向传播经过 卷积层1,输入卷积核大小为3*3,padding为1,stride=1时,输入与输出的H和W不变,stride=2时,输入与输出的H和W减半。之后经过bn,对上一层输出的每一通道进行归一化,输入与输出大小不变,再经过激活函数,在经过卷积层2,输入输出的H与W不变,最后经过bn层。由于经过残差网络后输入输出的通道数可能会不一致,因此需要使用1*1的卷积确保输入输出的通道数一致。
添加残差网络的卷积网络(输入图像的大小为64*64*3):
- class ResNet(torch.nn.Module):
- def __init__(self, in_channels, num_classes):
- super(ResNet,self).__init__()
- self.conv1 = torch.nn.Sequential(
- torch.nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3),#32*32*64
- torch.nn.BatchNorm2d(64),
- torch.nn.ReLU(inplace=True)
- )
-
- self.conv2 = torch.nn.Sequential(
- torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=1),#16*16*64
- Residual(64, 64),
- Residual(64, 64),
- Residual(64, 64)
- )
-
- self.conv3 = torch.nn.Sequential(
- Residual(64, 128, stride=2),#8*8*128
- Residual(128, 128),
- Residual(128, 128),
- Residual(128, 128),
- Residual(128, 128)
- )
-
- self.conv4 = torch.nn.Sequential(
- Residual(128, 256, stride=2),#4*4*256
- Residual(256, 256),
- Residual(256, 256),
- Residual(256, 256),
- Residual(256, 256),
- Residual(256, 256)
- )
-
- self.conv5 = torch.nn.Sequential(
- Residual(256, 512, stride=2),#2*2*512
- Residual(512, 512),
- Residual(512, 512)
- )
-
- self.avg_pool = torch.nn.AdaptiveAvgPool2d(1)#1*1*512
- self.fc = torch.nn.Linear(512, num_classes)
-
- def forward(self, x):
- out = self.conv1(x)
- out = self.conv2(out)
- out = self.conv3(out)
- out = self.conv4(out)
- out = self.conv5(out)
-
- out = self.avg_pool(out)
- out = out.view(out.size()[0], -1)#minibatch*(H*W*C)
-
- out = self.fc(out)
- return out
torch.nn.AdaptiveAvgPool2d(1)可以将上层的输出转化为1*1*上一层输出的通道数,方便连接全连接层。
- m = torch.nn.AdaptiveAvgPool2d((1))
- input = torch.randn(1, 64, 8, 9)
- output = m(input)
- print(output.shape)
-
- #输出:torch.Size([1, 64, 1, 1]),通道数不发生变化,输出数据相当于 minibatch x channels x height x width
模型参数如下:
- RN=ResNet(3,6)
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- RN.to(device)
- num = torch.cuda.device_count()
- epoch_num = 40
- learning_rate = 0.01
- batch_size = 32
- seed = 3
- costs = []
- optimizer = torch.optim.Adam(RN.parameters(), lr=learning_rate)
- loss_func = torch.nn.CrossEntropyLoss()
数据集来自吴恩达手势识别数据集。
- train_signs_data_path = '/content/drive/MyDrive/Colab Notebooks/吴恩达L2HW3/dataset/train_signs.h5'
- train_data = Image_Data(train_signs_data_path)
- train_data_loader = DataLoader(train_data, shuffle=True, batch_size=32)
- test_signs_data_path = '/content/drive/MyDrive/Colab Notebooks/吴恩达L2HW3/dataset/test_signs.h5'
- test_data = Image_Data(test_signs_data_path)
- test_data_loader = DataLoader(test_data, shuffle=False, batch_size=32)
读取训练集和测试集。
- class Image_Data(Dataset):
- def __init__(self, data_path):
- super(Image_Data, self).__init__()
- # 读取数据集
- dataset = h5py.File(data_path, "r")
- if data_path == "/content/drive/MyDrive/Colab Notebooks/吴恩达L2HW3/dataset/train_signs.h5":
- data_set_x_orig = np.array(dataset["train_set_x"][:])
- data_set_y_orig = np.array(dataset["train_set_y"][:])
- else:
- data_set_x_orig = np.array(dataset["test_set_x"][:])
- data_set_y_orig = np.array(dataset["test_set_y"][:])
-
- data_set_x_orig = data_set_x_orig.astype("float32") / 255
- data_set_y_orig = data_set_y_orig.astype("float32")
-
- self.x_data = torch.from_numpy(data_set_x_orig)
- self.y_data = torch.from_numpy(data_set_y_orig)
-
- self.len = self.y_data.size()[0]
-
- def __getitem__(self, item):
- return self.x_data[item], self.y_data[item]
-
- def __len__(self):
- return self.len
-
- def get_shape(self):
- return self.x_data.size(), self.y_data.size()
模型训练:
- RN.train()
- for epoch in range(epoch_num):
- cost = 0
- for i, data in enumerate(train_data_loader):
- img_data, img_label = data
- img_data = img_data.permute(0, 3, 1, 2)#维度的互换,本来是0,1,2,3,即样本数,高,宽,通道数,转换成0,3,1,2,样本数,通道数,高,宽
-
- img_data = img_data.to(device)
- img_label = img_label.to(device)
- #mmm=m.cnn(img_data)
- optimizer.zero_grad()
- y_pred = RN.forward(img_data)
- #print(y_pred.shape)
- #print(img_label.shape)
- loss = loss_func(y_pred, img_label.long())
-
- loss.backward()#反向传播
-
- optimizer.step()#参数更新
-
- cost = cost + loss.item()
- costs.append(cost / (i + 1))
-
- if epoch % 5 == 0:
- print("epoch=" + str(epoch) + ": " + "loss=" + str(cost / (i + 1)))
- plt.plot(costs)
- plt.ylabel("cost")
- plt.xlabel('iterations (per tens)')
- plt.title("Learning rate =" + str(learning_rate))
- plt.show()
输出:在较小的epoch上得到较小的训练损失。
epoch=0: loss=3.1251472795710846 epoch=5: loss=0.8047693231526543 epoch=10: loss=0.41270597980302925 epoch=15: loss=0.26181215551846165 epoch=20: loss=0.09259872174109607 epoch=25: loss=0.07002952140208114 epoch=30: loss=0.05526945255662534 epoch=35: loss=0.04294046186259948
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。