当前位置:   article > 正文

自己搭建Resnet50并在FashionMNIST训练-pytorch_load_data_fashion_mnist

load_data_fashion_mnist

详细解释在代码注释中 :

resnet50.py:用来保存resnet网络结构。

  1. import torch
  2. import torch.nn as nn
  3. from torch.nn import functional as F
  4. import torchsummary
  5. class Bottleneck(nn.Module):
  6. """
  7. __init__
  8. in_channel:残差块输入通道数
  9. out_channel:残差块输出通道数
  10. stride:卷积步长
  11. downsample:在_make_layer函数中赋值,用于控制shortcut图片下采样 H/2 W/2,来区分Bottleneck1与2
  12. """
  13. expansion = 4 # 残差块第3个卷积层的通道膨胀倍率
  14. def __init__(self, in_channel, out_channel, stride=1, downsample=None):
  15. super(Bottleneck, self).__init__()
  16. self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False) # H,W不变。C: in_channel -> out_channel
  17. self.bn1 = nn.BatchNorm2d(num_features=out_channel)
  18. self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, bias=False, padding=1) # H/2,W/2。C不变
  19. self.bn2 = nn.BatchNorm2d(num_features=out_channel)
  20. self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel*self.expansion, kernel_size=1, stride=1, bias=False) # H,W不变。C: out_channel -> 4*out_channel
  21. self.bn3 = nn.BatchNorm2d(num_features=out_channel*self.expansion)
  22. self.relu = nn.ReLU(inplace=True)
  23. self.downsample = downsample
  24. def forward(self, x):
  25. identity = x # 将原始输入暂存为shortcut的输出
  26. if self.downsample is not None:
  27. identity = self.downsample(x) # 如果需要下采样,那么shortcut后:H/2,W/2。C: out_channel -> 4*out_channel(见ResNet50中的downsample实现)
  28. out = self.conv1(x)
  29. out = self.bn1(out)
  30. out = self.relu(out)
  31. out = self.conv2(out)
  32. out = self.bn2(out)
  33. out = self.relu(out)
  34. out = self.conv3(out)
  35. out = self.bn3(out)
  36. out += identity # 残差连接
  37. out = self.relu(out)
  38. return out
  39. # todo ResNet
  40. class ResNet50(nn.Module):
  41. """
  42. __init__
  43. block: 堆叠的基本模块
  44. block_num: 基本模块堆叠个数,是一个list,对于resnet50=[3,4,6,3]
  45. num_classes: 全连接之后的分类特征维度
  46. _make_layer
  47. block: 堆叠的基本模块
  48. channel: 每个stage中堆叠模块的第一个卷积的卷积核个数,对resnet50分别是:64,128,256,512
  49. block_num: 当期stage堆叠block个数
  50. stride: 默认卷积步长
  51. """
  52. def __init__(self, block=Bottleneck, block_num=[3, 4, 6, 3], num_classes=1000):
  53. super(ResNet50, self).__init__()
  54. self.in_channel = 64 # conv1的输出维度
  55. self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.in_channel, kernel_size=7, stride=2, padding=3,
  56. bias=False) # H/2,W/2。C:3->64 H^/W^ = (H/W-K+2*p)/S+1
  57. self.bn1 = nn.BatchNorm2d(self.in_channel)
  58. self.relu = nn.ReLU(inplace=True)
  59. self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # H/2,W/2。C不变
  60. self.layer1 = self._make_layer(block=block, channel=64, block_num=block_num[0],
  61. stride=1) # H,W不变。downsample控制的shortcut,out_channel=64x4=256
  62. self.layer2 = self._make_layer(block=block, channel=128, block_num=block_num[1],
  63. stride=2) # H/2, W/2。downsample控制的shortcut,out_channel=128x4=512
  64. self.layer3 = self._make_layer(block=block, channel=256, block_num=block_num[2],
  65. stride=2) # H/2, W/2。downsample控制的shortcut,out_channel=256x4=1024
  66. self.layer4 = self._make_layer(block=block, channel=512, block_num=block_num[3],
  67. stride=2) # H/2, W/2。downsample控制的shortcut,out_channel=512x4=2048
  68. self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # 将每张特征图大小->(1,1),则经过池化后的输出维度=通道数
  69. self.fc = nn.Linear(in_features=512 * block.expansion, out_features=num_classes)
  70. for m in self.modules(): # 权重初始化
  71. if isinstance(m, nn.Conv2d):
  72. nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') #均值为0的随机正态分布,fan_out保留了向后传递的幅度
  73. def _make_layer(self, block, channel, block_num, stride=1):
  74. downsample = None # 用于控制shorcut的
  75. if stride != 1 or self.in_channel != channel * block.expansion: # 对resnet50:conv2中特征图尺寸H,W不需要下采样/2,但是通道数x4,因此shortcut通道数也需要x4。对其余conv3,4,5,既要特征图尺寸H,W/2,又要shortcut维度x4
  76. downsample = nn.Sequential(
  77. nn.Conv2d(in_channels=self.in_channel, out_channels=channel * block.expansion, kernel_size=1,
  78. stride=stride, bias=False), # out_channels决定输出通道数x4,stride决定特征图尺寸H,W/2
  79. nn.BatchNorm2d(num_features=channel * block.expansion))
  80. layers = [] # 每一个convi_x的结构保存在一个layers列表中,i={2,3,4,5}
  81. layers.append(block(in_channel=self.in_channel, out_channel=channel, downsample=downsample,
  82. stride=stride)) # 定义convi_x中的第一个残差块,只有第一个需要设置downsample和stride
  83. self.in_channel = channel * block.expansion # 在下一次调用_make_layer函数的时候,self.in_channel已经x4
  84. for _ in range(1, block_num): # 通过循环堆叠其余残差块(堆叠了剩余的block_num-1个)
  85. layers.append(block(in_channel=self.in_channel, out_channel=channel))
  86. return nn.Sequential(*layers) # '*'的作用是将list转换为非关键字参数传入
  87. def forward(self, x):
  88. x = self.conv1(x)
  89. x = self.bn1(x)
  90. x = self.relu(x)
  91. x = self.maxpool(x)
  92. x = self.layer1(x)
  93. x = self.layer2(x)
  94. x = self.layer3(x)
  95. x = self.layer4(x)
  96. x = self.avgpool(x)
  97. x = torch.flatten(x, 1)
  98. x = self.fc(x)
  99. return x
  100. if __name__ == '__main__':
  101. input = torch.randn(1, 1, 224, 224) # B C H W
  102. print(input.shape)
  103. resnet50 = ResNet50(num_classes=10)
  104. output = resnet50.forward(input)
  105. #print(resnet50)
  106. #print(output)
  107. resnet50 = resnet50.cuda()
  108. #torchsummary观察网络结构
  109. torchsummary.summary(resnet50, (1, 224, 224))

train_resnet50.py:训练文件。

  1. import time
  2. import torch
  3. import torch.nn.functional as F
  4. import numpy as np
  5. from matplotlib import pyplot as plt
  6. import torchvision
  7. import resnet50
  8. # todo: 读取常用数据集
  9. def load_data_fashion_mnist(batch_size, resize=None, root='./Datasets/'):
  10. """Download the fashion mnist dataset and then load into memory."""
  11. trans = []
  12. # 是否需要resize,默认插值方法为BILINEAR
  13. if resize:
  14. trans.append(torchvision.transforms.Resize(size=resize))
  15. trans.append(torchvision.transforms.ToTensor())
  16. transform = torchvision.transforms.Compose(trans) # 通过Compose将trans里的多个步骤合到一起
  17. # torchvision.datasets包含了目前流行的数据集,模型结构和图片转换工具,用这个可以快速读取数据
  18. mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
  19. mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
  20. """
  21. torch.utils.data.DataLoader()用来输入数据和标签,常用参数如下:
  22. dataset:表示Dataset类,决定了读取的数据
  23. batch_size:每次处理的数据批量大小,一般为2的次方,如2,4,8,16,32,64等等
  24. shuffle:是否随机读入数据,在训练集的时候一般随机读入,在验证集的时候一般不随机读入
  25. num_works:多线程传入数据,设置的数字即使传入的线程数,可以加快数据的读取
  26. drop_last:如果数据集的大小不能被批大小整除,当样本数不能被batch_size整除时,是否舍弃最后一批数据
  27. """
  28. num_workers = 0
  29. train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
  30. test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
  31. #print(train_iter)
  32. return train_iter, test_iter
  33. # todo: 转换自己的数据集
  34. # 需要继承torch.utils.data.Dataset,并且重写__getitem__()和__len__()类方法,传入resize后的tensor数据
  35. class MyDataset(torch.utils.data.Dataset):
  36. # 构造函数
  37. def __init__(self, data_tensor, target_tensor):
  38. self.data_tensor = data_tensor
  39. self.target_tensor = target_tensor
  40. # 返回数据集大小
  41. def __len__(self):
  42. return self.data_tensor.size(0)
  43. # 返回索引的数据与标签
  44. def __getitem__(self, index):
  45. return self.data_tensor[index], self.target_tensor[index]
  46. # todo: 读取自己的数据集
  47. def load_data_MyDataset(data_tensor, target_tensor, batch_size, train_or_test='train', num_workers=0):
  48. my_dataset = MyDataset(data_tensor, target_tensor)
  49. if train_or_test == 'train':
  50. iter = torch.utils.data.DataLoader(my_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
  51. elif train_or_test == 'test':
  52. iter = torch.utils.data.DataLoader(my_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
  53. else:
  54. print("check your param : train_or_test!")
  55. return iter
  56. # todo: 自己设定损失函数,需要继承nn.Module
  57. class cross_entropy_loss(torch.nn.Module):
  58. def __init__(self, reduction='mean'):
  59. super(cross_entropy_loss, self).__init__()
  60. self.reduction = reduction # 用来指定损失结果返回的是mean、sum
  61. def forward(self, logits, target):
  62. # logits: [N, C, H, W], target: [N, H, W]
  63. # loss = sum(-y_i * log(c_i))
  64. if logits.dim() > 2:
  65. logits = logits.view(logits.size(0), logits.size(1), -1) # [N, C, HW]
  66. logits = logits.transpose(1, 2) # [N, HW, C]
  67. logits = logits.contiguous().view(-1, logits.size(2)) # [NHW, C]
  68. target = target.view(-1, 1) # [NHW,1]
  69. logits = F.log_softmax(logits, 1)
  70. logits = logits.gather(1, target) # [NHW, 1]
  71. loss = -1 * logits
  72. if self.reduction == 'mean':
  73. loss = loss.mean()
  74. elif self.reduction == 'sum':
  75. loss = loss.sum()
  76. return loss
  77. # todo: 计算测试集准确率
  78. def evaluate_accuracy(data_iter, net, device=None):
  79. if device is None and isinstance(net, torch.nn.Module):
  80. # 如果没指定device就使用net的device
  81. device = list(net.parameters())[0].device
  82. acc_sum, n = 0.0, 0
  83. with torch.no_grad():
  84. for X, y in data_iter:
  85. # 因为FashionMNIST输入为单通道图片,需要转换为三通道
  86. X = np.array(X)
  87. X = X.transpose((1, 0, 2, 3)) # array 转置
  88. X = np.concatenate((X, X, X), axis=0)
  89. X = X.transpose((1, 0, 2, 3)) # array 转置回来
  90. X = torch.tensor(X) # 将 numpy 数据格式转为 tensor
  91. if isinstance(net, torch.nn.Module):
  92. net.eval() # 评估模式, 这会关闭dropout
  93. acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
  94. net.train() # 改回训练模式
  95. else:
  96. if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
  97. # 将is_training设置成False
  98. acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
  99. else:
  100. acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
  101. n += y.shape[0]
  102. return acc_sum / n
  103. # todo: 训练函数
  104. def train(net, train_iter, test_iter, optimizer, device, num_epochs):
  105. print("training on : ", device)
  106. # 保存精度用来绘图
  107. Train_acc, Test_acc = [0], [0]
  108. for epoch in range(num_epochs):
  109. print(f"Epoch {epoch + 1}\n----------------------")
  110. train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
  111. for X, y in train_iter:
  112. # 因为FashionMNIST输入为单通道图片,需要转换为三通道
  113. X = np.array(X)
  114. X = X.transpose((1, 0, 2, 3)) # array 转置
  115. X = np.concatenate((X, X, X), axis=0) # 维度拼接
  116. X = X.transpose((1, 0, 2, 3)) # array 转置回来
  117. X = torch.tensor(X) # 将 numpy 数据格式转为 tensor
  118. # 将数据移到gpu上
  119. X = X.to(device)
  120. y = y.to(device)
  121. # 得到预测结果
  122. y_hat = net(X)
  123. # 计算损失
  124. l = loss(y_hat, y)
  125. optimizer.zero_grad() # 梯度清零
  126. l.backward() # 计算反向传播
  127. optimizer.step() # 梯度下降,参数更新
  128. # cpu()函数作用是将数据从GPU上复制到memory上,item()返回的是一个数值而非tensor,想要返回得到tensor要用cpu().data
  129. train_l_sum += l.cpu().item()
  130. train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
  131. n += y.shape[0]
  132. batch_count += 1
  133. # print("train loss : %.4f, train acc : %.3f" %(train_l_sum / batch_count, train_acc_sum / n))
  134. # 每个epoch的结果输出到控制台并保存数据以便最后绘制精度曲线图像/损失曲线图像
  135. test_acc = evaluate_accuracy(test_iter, net)
  136. print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
  137. % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
  138. Train_acc.append(train_acc_sum / n)
  139. Test_acc.append(test_acc)
  140. if epoch == num_epochs-1:
  141. torch.save(net.state_dict(), "./last_model.pth") # 权重保存
  142. # 保存精度与迭代次数图像
  143. plt.xlabel("Epochs")
  144. plt.ylabel("Accuracy")
  145. plt.ylim(0, 1)
  146. plt.xlim(0, 10)
  147. plt.plot(np.arange(len(Train_acc)), Train_acc, label='train_acc')
  148. plt.plot(np.arange(len(Test_acc)), Test_acc, label='test_acc')
  149. plt.savefig('./acc_result.png')
  150. print("Done!")
  151. # 使用GPU
  152. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  153. # 网络Resnet50,FashionMNIST为10类
  154. net = resnet50.ResNet50(num_classes=10).to(device)
  155. # 交叉熵损失函数
  156. #loss = torch.nn.CrossEntropyLoss()
  157. loss = cross_entropy_loss()
  158. # 批量大小
  159. batch_size = 64
  160. # 训练和测试数据集划分
  161. train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=96)
  162. # 学习率和迭代轮次
  163. lr, num_epochs = 0.0001, 10
  164. # 优化器采用Adam
  165. optimizer = torch.optim.Adam(net.parameters(), lr=lr)
  166. #开始训练
  167. train(net, train_iter, test_iter, optimizer, device, num_epochs)

结果图:

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小蓝xlanll/article/detail/73069
推荐阅读
相关标签
  

闽ICP备14008679号