当前位置:   article > 正文

PyTorch深度学习(18)网络结构LeNet、AlexNet_torch predict

torch predict

CNN(Convolutional Neural Network)

目标分类  Classification  图像属于哪一类
目标检索  Retrieval  相同种类归为一类
目标检测  Detection 框选且框内是什么,概率是多少
图像分割  Segmentation  图像分为不同区域
无人驾驶  self-driving cars

图像描述 Image Captioning
图像风格迁移 某些图像特征可视化,不同风格应用到类似图中

雏形:LeCun的LeNet(1998)网络结构

Pytorch Tensor的通道顺序:[batch,channel,height,wight]

(1)LeNet

  1. import torch
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. class LeNet(nn.Module):
  5. def __init__(self):
  6. super(LeNet, self).__init__()
  7. self.conv1 = nn.Conv2d(3, 16, 5)
  8. self.pool1 = nn.MaxPool2d(2, 2)
  9. self.conv2 = nn.Conv2d(16, 32, 5)
  10. self.pool2 = nn.MaxPool2d(2, 2)
  11. self.fc1 = nn.Linear(32*5*5, 120)
  12. self.fc2 = nn.Linear(120, 84)
  13. self.fc3 = nn.Linear(84, 10)
  14. def forward(self, x):
  15. x = F.relu(self.conv1(x)) # input(3, 32, 32) output(16, 28, 28)
  16. x = self.pool1(x) # output(16, 14, 14)
  17. x = F.relu(self.conv2(x)) # output(32, 10, 10)
  18. x = self.pool2(x) # output(32, 5, 5)
  19. x = x.view(-1, 32*5*5) # output(32*5*5)
  20. x = F.relu(self.fc1(x)) # output(120)
  21. x = F.relu(self.fc2(x)) # output(84)
  22. x = self.fc3(x) # output(10)
  23. return x
  24. input1 = torch.rand([32, 3, 32, 32])
  25. model = LeNet()
  26. print(model)
  27. output = model(input1)

(2)Train Phase

  1. import torch
  2. import torchvision
  3. import torch.nn as nn
  4. from LeNet import LeNet
  5. import torch.optim as optim
  6. import torchvision.transforms as transforms
  7. import numpy as np
  8. import torch.utils.data.dataloader
  9. import matplotlib.pyplot as plt
  10. transform = transforms.Compose(
  11. [transforms.ToTensor(),
  12. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  13. # 50000张训练图片
  14. trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
  15. # 10000张测试图片
  16. testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
  17. # 加载数据
  18. trainLoader = torch.utils.data.DataLoader(trainset, batch_size=36, shuffle=True, num_workers=0)
  19. testLoader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=True, num_workers=0)
  20. # 迭代器
  21. test_data_iter = iter(testLoader)
  22. test_image, test_label = test_data_iter.next()
  23. classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
  24. net = LeNet()
  25. loss_function = nn.CrossEntropyLoss()
  26. optim = optim.Adam(net.parameters(), lr=0.001)
  27. # 官方显示图片代码
  28. # def imshow(img):
  29. # img = img / 2 + 0.5 # unnormalize
  30. # npimg = img.numpy()
  31. # plt.imshow(np.transpose(npimg, (1, 2, 0)))
  32. # plt.show()
  33. #
  34. # # print label
  35. # print(' '.join('%5s' % classes[test_label[j]] for j in range(4)))
  36. # # show image
  37. # imshow(torchvision.utils.make_grid(test_image))
  38. for epoch in range(5):
  39. running_loss = 0.0
  40. for step, data in enumerate(trainLoader, start=0):
  41. # get the inputs; data is a list of [inputs, labels]
  42. inputs, labels = data
  43. # zero the parameter gradients
  44. optim.zero_grad()
  45. # forward + backward + optimize
  46. outputs = net(inputs)
  47. loss = loss_function(outputs, labels)
  48. loss.backward()
  49. optim.step()
  50. # print statistics
  51. running_loss += loss.item()
  52. if step % 500 == 499: # print every 500 mini-batches
  53. with torch.no_grad():
  54. outputs = net(test_image) # [batch, 10]
  55. predict_y = torch.max(outputs, dim=1)[1]
  56. accuracy = (predict_y == test_label).sum().item() / test_label.size(0)
  57. print('[%d, %5d] train_loss: %.3f test_accuracy:%.3f' %
  58. (epoch + 1, step + 1, running_loss / 500, accuracy))
  59. running_loss = 0.0
  60. print('Finished Training')
  61. save_path = './Lenet.pth'
  62. torch.save(net.state_dict(), save_path)
  1. # GPU训练
  2. import torchvision
  3. import torch
  4. from LeNet import LeNet
  5. from torchvision import transforms
  6. import torch.utils.data.dataloader
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. import torch.optim as opt
  10. classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
  11. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  12. transform = transforms.Compose([
  13. transforms.ToTensor(),
  14. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
  15. ])
  16. net = LeNet()
  17. net.to(device)
  18. loss_function = torch.nn.CrossEntropyLoss()
  19. optimizer = opt.Adam(net.parameters(), lr=0.001)
  20. def main():
  21. print('---%s' % device)
  22. train_data = torchvision.datasets.CIFAR10(root='../dataset/cifar_data', train=True, transform=transform, download=False)
  23. test_data = torchvision.datasets.CIFAR10(root='../dataset/cifar_data', train=False, transform=transform, download=False)
  24. train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=0)
  25. test_loader = torch.utils.data.DataLoader(test_data, batch_size=10000, shuffle=True, num_workers=0)
  26. test_iter = iter(test_loader)
  27. test_images, test_labels = test_iter.next()
  28. for epoch in range(5):
  29. running_loss = 0.0
  30. for step, data in enumerate(train_loader, start=0):
  31. inputs, labels = data
  32. optimizer.zero_grad()
  33. inputs = inputs.to(device)
  34. labels = labels.to(device)
  35. outputs = net(inputs)
  36. loss = loss_function(outputs, labels)
  37. loss.backward()
  38. optimizer.step()
  39. running_loss += loss.item()
  40. if step % 500 == 499:
  41. with torch.no_grad():
  42. test_images = test_images.to(device)
  43. test_labels = test_labels.to(device)
  44. outputs = net(test_images)
  45. predict_y = torch.max(outputs, dim=1)[1]
  46. accuracy = (predict_y == test_labels).sum().item() / test_labels.size(0)
  47. print('%d %3d summary loss=%.3f accuracy=%.3f' % (epoch + 1, step + 1, running_loss, accuracy))
  48. save_pth() # 保存路径
  49. def imshow(img):
  50. img = img / 2 + 0.5 # normalize
  51. npimg = img.numpy()
  52. plt.imshow(np.transpose(npimg, (1, 2, 0)))
  53. plt.show()
  54. def save_pth():
  55. save_path = './Lenet.pth'
  56. torch.save(net.state_dict(), save_path)
  57. if __name__ == '__main__':
  58. main()

(3)Test Phase

  1. import torch
  2. import torchvision.transforms as transforms
  3. from PIL import Image
  4. from LeNet import LeNet
  5. transform = transforms.Compose(
  6. [transforms.Resize((32, 32)),
  7. transforms.ToTensor(),
  8. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  9. classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
  10. net = LeNet()
  11. net.load_state_dict(torch.load('Lenet.pth'))
  12. im = Image.open('image name')
  13. im = transform(im) # [C, H, W]
  14. im = torch.unsqueeze(im, dim=0) # [N, C, H, W]
  15. with torch.no_grad():
  16. outputs = net(im)
  17. predict = torch.max(outputs, dim=1)[1].data.numpy()
  18. predict = torch.softmax(outputs, dim=1)
  19. print(classes[int(predict)])

AlexNet

2012年ISLVRC2012(ImageNet Large Scale Visual Recognition Challenge)竞赛冠军网络,分类准确率由传统的70%+提升到80%+。由Hinton和其学生Alex Krizhevsky设计的。

ISLVRC  用于数据分类

  • ISLVRC 2012
  • 训练集:1281167张已标注图片
  • 验证集:50000张已标注图片
  • 测试集:100000张未标注图片

 亮点:

  1. 首次利用GPU进行网络加速训练
  2. 使用了ReLU激活函数,而不是传统Sigmoid激活函数及Tanh激活函数
  3. 使用了LRN局部响应归一化
  4. 在全连接层的前两层中使用了Dropout随机失活神经元操作,减少过拟合
layer_namekernel_sizekernel_numpaddingstride
Conv11196[1, 2]4
Maxpool13None02
Conv25256[2, 2]1
Maxpool23None02
Conv33384[1, 1]1
Conv43384[1, 1]1
Conv53256[1, 1]1
Maxpool33None02
FC12048NoneNoneNone
FC22048NoneNoneNone
FC31000NoneNoneNone

AlexNet

  1. import torch.nn as nn
  2. import torch
  3. class AlexNet(nn.Module):
  4. def __init__(self, num_classes=1000, init_weights=False):
  5. super(AlexNet, self).__init__()
  6. self.features = nn.Sequential(
  7. nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
  8. nn.ReLU(inplace=True),
  9. nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
  10. nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
  11. nn.ReLU(inplace=True),
  12. nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
  13. nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
  14. nn.ReLU(inplace=True),
  15. nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
  16. nn.ReLU(inplace=True),
  17. nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
  18. nn.ReLU(inplace=True),
  19. nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
  20. )
  21. self.classifier = nn.Sequential(
  22. nn.Dropout(p=0.5),
  23. nn.Linear(128 * 6 * 6, 2048),
  24. nn.ReLU(inplace=True),
  25. nn.Dropout(p=0.5),
  26. nn.Linear(2048, 2048),
  27. nn.ReLU(inplace=True),
  28. nn.Linear(2048, num_classes),
  29. )
  30. if init_weights:
  31. self._initialize_weights()
  32. def forward(self, x):
  33. x = self.features(x)
  34. x = torch.flatten(x, start_dim=1)
  35. x = self.classifier(x)
  36. return x
  37. def _initialize_weights(self):
  38. for m in self.modules():
  39. if isinstance(m, nn.Conv2d):
  40. nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
  41. if m.bias is not None:
  42. nn.init.constant_(m.bias, 0)
  43. elif isinstance(m, nn.Linear):
  44. nn.init.normal_(m.weight, 0, 0.01)
  45. nn.init.constant_(m.bias, 0)

Train

  1. import os
  2. import sys
  3. import json
  4. import torch
  5. import torch.nn as nn
  6. from torchvision import transforms, datasets, utils
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. import torch.optim as optim
  10. from tqdm import tqdm
  11. from model import AlexNet
  12. def main():
  13. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  14. print("using {} device.".format(device))
  15. data_transform = {
  16. "train": transforms.Compose([transforms.RandomResizedCrop(224),
  17. transforms.RandomHorizontalFlip(),
  18. transforms.ToTensor(),
  19. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
  20. "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
  21. transforms.ToTensor(),
  22. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
  23. data_root = os.path.abspath(os.path.join(os.getcwd(), "")) # get data root path
  24. image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
  25. assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
  26. train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
  27. transform=data_transform["train"])
  28. train_num = len(train_dataset)
  29. # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
  30. flower_list = train_dataset.class_to_idx
  31. cla_dict = dict((val, key) for key, val in flower_list.items())
  32. # write dict into json file
  33. json_str = json.dumps(cla_dict, indent=4)
  34. with open('class_indices.json', 'w') as json_file:
  35. json_file.write(json_str)
  36. batch_size = 32
  37. nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
  38. print('Using {} dataloader workers every process'.format(nw))
  39. train_loader = torch.utils.data.DataLoader(train_dataset,
  40. batch_size=batch_size, shuffle=True,
  41. num_workers=nw)
  42. validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
  43. transform=data_transform["val"])
  44. val_num = len(validate_dataset)
  45. validate_loader = torch.utils.data.DataLoader(validate_dataset,
  46. batch_size=4, shuffle=False,
  47. num_workers=nw)
  48. print("using {} images for training, {} images for validation.".format(train_num,
  49. val_num))
  50. # test_data_iter = iter(validate_loader)
  51. # test_image, test_label = test_data_iter.next()
  52. #
  53. # def imshow(img):
  54. # img = img / 2 + 0.5 # unnormalize
  55. # npimg = img.numpy()
  56. # plt.imshow(np.transpose(npimg, (1, 2, 0)))
  57. # plt.show()
  58. #
  59. # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
  60. # imshow(utils.make_grid(test_image))
  61. net = AlexNet(num_classes=5, init_weights=True)
  62. net.to(device)
  63. loss_function = nn.CrossEntropyLoss()
  64. # pata = list(net.parameters())
  65. optimizer = optim.Adam(net.parameters(), lr=0.0002)
  66. epochs = 10
  67. save_path = './AlexNet.pth'
  68. best_acc = 0.0
  69. train_steps = len(train_loader)
  70. for epoch in range(epochs):
  71. # train
  72. net.train()
  73. running_loss = 0.0
  74. train_bar = tqdm(train_loader, file=sys.stdout)
  75. for step, data in enumerate(train_bar):
  76. images, labels = data
  77. optimizer.zero_grad()
  78. outputs = net(images.to(device))
  79. loss = loss_function(outputs, labels.to(device))
  80. loss.backward()
  81. optimizer.step()
  82. # print statistics
  83. running_loss += loss.item()
  84. train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
  85. epochs,
  86. loss)
  87. # validate
  88. net.eval()
  89. acc = 0.0 # accumulate accurate number / epoch
  90. with torch.no_grad():
  91. val_bar = tqdm(validate_loader, file=sys.stdout)
  92. for val_data in val_bar:
  93. val_images, val_labels = val_data
  94. outputs = net(val_images.to(device))
  95. predict_y = torch.max(outputs, dim=1)[1]
  96. acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
  97. val_accurate = acc / val_num
  98. print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
  99. (epoch + 1, running_loss / train_steps, val_accurate))
  100. if val_accurate > best_acc:
  101. best_acc = val_accurate
  102. torch.save(net.state_dict(), save_path)
  103. print('Finished Training')
  104. if __name__ == '__main__':
  105. main()

Predict

  1. import os
  2. import json
  3. import torch
  4. from PIL import Image
  5. from torchvision import transforms
  6. import matplotlib.pyplot as plt
  7. from model import AlexNet
  8. def main():
  9. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  10. data_transform = transforms.Compose(
  11. [transforms.Resize((224, 224)),
  12. transforms.ToTensor(),
  13. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  14. # load image
  15. img_path = "../tulip.jpg"
  16. assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
  17. img = Image.open(img_path)
  18. plt.imshow(img)
  19. # [N, C, H, W]
  20. img = data_transform(img)
  21. # expand batch dimension
  22. img = torch.unsqueeze(img, dim=0)
  23. # read class_indict
  24. json_path = './class_indices.json'
  25. assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
  26. json_file = open(json_path, "r")
  27. class_indict = json.load(json_file)
  28. # create model
  29. model = AlexNet(num_classes=5).to(device)
  30. # load model weights
  31. weights_path = "./AlexNet.pth"
  32. assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
  33. model.load_state_dict(torch.load(weights_path))
  34. model.eval()
  35. with torch.no_grad():
  36. # predict class
  37. output = torch.squeeze(model(img.to(device))).cpu()
  38. predict = torch.softmax(output, dim=0)
  39. predict_cla = torch.argmax(predict).numpy()
  40. print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
  41. predict[predict_cla].numpy())
  42. plt.title(print_res)
  43. for i in range(len(predict)):
  44. print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
  45. predict[i].numpy()))
  46. plt.show()
  47. if __name__ == '__main__':
  48. main()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/691103
推荐阅读
相关标签
  

闽ICP备14008679号