当前位置:   article > 正文

22- Pytorch实现天气分类 (Pytorch系列) (项目二十二)_四种天气数据集

四种天气数据集

项目要点

  • 4种天气数据的分类:   cloudy,  rain,  shine,  sunrise.
  • all_img_path = glob.glob(r'G:\01-project\08-深度学习\day 56 迁移学习\dataset/*.jpg')     # 指定文件夹   # import glob
  • 获取随机数列: index = np.random.permutation(len(all_img_path))
  • 建立数组和索引的关联: idx_to_species = dict((i, c) for i, c in enumerate(species))
  • transform = transforms.Compose([transforms.Resize((96, 96)), transforms.ToTensor()])    # 转换为tensor  # 定义transform
  • 数据由numpy转换为tensor: torch.from_numpy(np.array(label)).long()
  • 判断图片的通道数: if np.array(img).shape[-1] == 3
  • 打开文件夹图片: img = Image.open(all_img_path[0])
  • 数据转换为ndarray: np.asarray(img).shape
  • train_d1 = torch.utils.data.DataLoader(train_ds, batch_size = 16, shuffle = True, collate_fn = MyDataset.collate_fn, drop_last = True)      # 定义dataloader  # 最后一批数据直接不用

定义模型:

  • 添加卷积层: self.conv1 = nn.Conv2d(3, 32, 3)
  • 添加激活层: x = self.pool(F.relu(self.conv1(x)))
  • 添加BN层: self.bn1 = nn.BatchNorm2d(32)     # x = self.bn1(x)
  • 添加Flatten层: x = nn.Flatten()(x)     # 用来将输入“压平”,即把多维的输入一维化,# 常用在从 卷积层到全连接层的过渡。
  • 添加卷积层: self.fc1 = nn.Linear(64 * 10 * 10, 1024)   
  • 添加激活层: x = F.relu(self.fc1(x))
  • 添加dropout: self.dropout = nn.Dropout()    # 防止过拟合
  • 添加输出层: self.fc3 = nn.Linear(256, 4)   
    • x = self.fc3(x)
  • 定义程序运行位置: device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  • 定义优化器: optimizer = optim.Adam(model.parameters(), lr=0.001)
  • 定义loss: loss_fn = nn.CrossEntropyLoss()
  • 定义梯度下降:
  1. for x, y in train_loader:
  2. x, y = x.to(device), y.to(device)
  3. y_pred = model(x)
  4. loss = loss_fn(y_pred, y)
  5. optimizer.zero_grad()
  6. loss.backward()
  7. optimizer.step()
  8. with torch.no_grad():
  9. y_pred = torch.argmax(y_pred, dim=1)
  10. correct += (y_pred == y).sum().item()
  11. total += y.size(0)
  12. running_loss += loss.item()


一 自定义数据集分类

 4种天气数据的分类: cloudy,  rain,  shine,  sunrise.

1.1 导包

  1. import torch
  2. import numpy as np
  3. from torchvision import transforms
  4. import glob
  5. from PIL import Image
  6. import torch.nn.functional as F
  7. import torch.optim as optim
  8. import matplotlib.pyplot as plt

1.2 数据导入

  • 指定文件夹
all_img_path = glob.glob(r'G:\01-project\08-深度学习\day 56 迁移学习\dataset/*.jpg')
  • 打乱顺序
  1. # permutation 排列组合
  2. # 借助ndarray的索引取值的方法, 打乱数据
  3. index = np.random.permutation(len(all_img_path))
  4. index # array([ 175, 1027, 530, ..., 4, 831, 65])
  1. species = ['cloudy', 'rain', 'shine', 'sunrise']
  2. # 建立类别和索引之间的映射关系
  3. idx_to_species = dict((i, c) for i, c in enumerate(species))
  4. # {0: 'cloudy', 1: 'rain', 2: 'shine', 3: 'sunrise'}
  5. # 生成所有图片的label
  6. all_labels = []
  7. for img in all_img_path:
  8. for i,c in enumerate(species):
  9. if c in img:
  10. all_labels.append(i)
  •  数据格式转换
  1. all_labels = np.array(all_labels, dtype=np.int64)[index]
  2. all_labels # array([0, 3, 2, ..., 0, 3, 0], dtype=int64)
  1. all_img_path = np.array(all_img_path)[index]
  2. all_img_path

1.3 数据拆分

  1. # 手动划分一下训练数据和测试数据
  2. split = int(len(all_img_path) * 0.8) # int 只取整数部分
  3. train_imgs = all_img_path[:split]
  4. train_labels = all_labels[:split]
  5. test_imgs = all_img_path[split:]
  6. test_labels = all_labels[split:]
  • 定义 transform
  1. # 定义transform
  2. transform = transforms.Compose([transforms.Resize((96, 96)),
  3. transforms.ToTensor()]) # 转换为tensor

1.4 数据处理

  1. class MyDataset(torch.utils.data.Dataset):
  2. def __init__(self, img_paths, labels, transform): # 接受初始化数据
  3. self.imgs = img_paths
  4. self.labels = labels
  5. self.transforms = transform
  6. def __getitem__(self, index): # 取上面的数据
  7. # 根据index获取item
  8. img_path = self.imgs[index]
  9. label = self.labels[index]
  10. # 通过PIL的Image读取图片
  11. img = Image.open(img_path)
  12. if np.array(img).shape[-1] == 3:
  13. data = self.transforms(img)
  14. return data, torch.from_numpy(np.array(label)).long()
  15. else:
  16. # 否则为有问题的图片
  17. print(img_path)
  18. # print(np.array(img).shape)
  19. # print(np.array(img))
  20. return self.__getitem__(index+1)
  21. def __len__(self): # 调用数据时, 返回长度
  22. return len(self.imgs) # 返回个数
  23. # 重写collate_fn
  24. @staticmethod
  25. def collate_fn(batch):
  26. # batch是列表, 长度是batch_size
  27. # 列表的每个元素是一个元组(x, y)
  28. # [(x1, y1), (x2, y2).......]
  29. # collate_fn 的作用, 把所有的x,y分别放到一起, x在一起, y在一起.
  30. # 把batch中返回值为空的部分过滤掉
  31. batch = [sample for sample in batch if sample is not None]
  32. # 简单方法, 直接调用默认的collate方法
  33. # from torch.utils.data.dataloader import default_collate
  34. # return default_collate(batch)
  35. # 方式二
  36. imgs, labels = zip(*batch)
  37. return torch.stack(imgs, 0), torch.stack(labels, 0)
  38. dataset = MyDataset(all_img_path, all_labels, transform)
  39. len(dataset) # 1122
  1. train_ds = MyDataset(train_imgs, train_labels, transform)
  2. test_ds = MyDataset(test_imgs, test_labels, transform)
  3. # dataloader
  4. train_d1 = torch.utils.data.DataLoader(train_ds, batch_size = 16,
  5. shuffle = True,
  6. collate_fn=MyDataset.collate_fn,
  7. drop_last = True) # 最后一批数据直接不用
  8. test_d1 = torch.utils.data.DataLoader(test_ds, batch_size = 16 * 2,
  9. collate_fn=MyDataset.collate_fn, drop_last = True)
  10. for x, y in train_d1:
  11. print(x.shape,y.shape)

  1. imgs, labels = next(iter(train_d1))
  2. imgs.shape # torch.Size([16, 3, 96, 96])
  3. labels # tensor([3, 3, 2, 3, 2, 3, 3, 0, 1, 1, 0, 3, 2, 0, 1, 1])

1.5 定义模型

  1. # 定义模型 # 添加BN层
  2. import torch.nn as nn
  3. class Net(nn.Module):
  4. def __init__(self):
  5. super().__init__()
  6. self.conv1 = nn.Conv2d(3, 32, 3) # 卷积 # (96, 96, 3) --> (32, 94, 94)
  7. # 主要做标准化处理
  8. self.bn1 = nn.BatchNorm2d(32)
  9. self.pool = nn.MaxPool2d(2, 2) # 池化 # (32, 47, 47)
  10. self.conv2 = nn.Conv2d(32, 32, 3) # (32, 45, 45) --> pooling --> (32, 22, 22)
  11. self.bn2 = nn.BatchNorm2d(32)
  12. self.conv3 = nn.Conv2d(32, 64, 3) # (64, 22, 22) --> pooling --> (64, 10, 10)
  13. self.bn3 = nn.BatchNorm2d(64)
  14. self.dropout = nn.Dropout() # 防止过拟合 #
  15. # batch, channel, height, width, 64
  16. # 全连接层
  17. self.fc1 = nn.Linear(64 * 10 * 10, 1024)
  18. self.bn_fc1 = nn.BatchNorm1d(1024)
  19. self.fc2 = nn.Linear(1024, 256)
  20. self.bn_fc2 = nn.BatchNorm1d(256)
  21. self.fc3 = nn.Linear(256, 4)
  22. def forward(self, x):
  23. x = self.pool(F.relu(self.conv1(x)))
  24. x = self.bn1(x)
  25. x = self.pool(F.relu(self.conv2(x)))
  26. x = self.bn2(x)
  27. x = self.pool(F.relu(self.conv3(x)))
  28. x = self.bn3(x)
  29. # x.view(-1, 64 * 10 * 10)
  30. # flatten , Flatten层用来将输入“压平”,即把多维的输入一维化,
  31. # 常用在从卷积层到全连接层的过渡。
  32. x = nn.Flatten()(x)
  33. x = F.relu(self.fc1(x))
  34. x = self.bn_fc1(x)
  35. x = self.dropout(x)
  36. x = F.relu(self.fc2(x))
  37. x = self.bn_fc2(x)
  38. x = self.dropout(x)
  39. x = self.fc3(x)
  40. return x
  1. device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  2. device # device(type='cpu')
  3. # 生成模型
  4. model = Net()
  5. # 把模型拷贝到GPU
  6. if torch.cuda.is_available():
  7. model.to(device)

1.6 定义训练

  1. optimizer = optim.Adam(model.parameters(), lr=0.001)
  2. loss_fn = nn.CrossEntropyLoss()
  3. # 定义训练过程
  4. def fit(epoch, model, train_loader, test_loader):
  5. correct = 0
  6. total = 0
  7. running_loss = 0
  8. for x, y in train_loader:
  9. x, y = x.to(device), y.to(device)
  10. y_pred = model(x)
  11. loss = loss_fn(y_pred, y)
  12. optimizer.zero_grad()
  13. loss.backward()
  14. optimizer.step()
  15. with torch.no_grad():
  16. y_pred = torch.argmax(y_pred, dim=1)
  17. correct += (y_pred == y).sum().item()
  18. total += y.size(0)
  19. running_loss += loss.item()
  20. epoch_loss = running_loss / len(train_loader.dataset)
  21. epoch_acc = correct / total
  22. # 测试过程
  23. test_correct = 0
  24. test_total = 0
  25. test_running_loss = 0
  26. with torch.no_grad():
  27. for x, y in test_loader:
  28. x, y = x.to(device), y.to(device)
  29. y_pred = model(x)
  30. loss = loss_fn(y_pred, y)
  31. y_pred = torch.argmax(y_pred, dim=1)
  32. test_correct += (y_pred == y).sum().item()
  33. test_total += y.size(0)
  34. test_running_loss += loss.item()
  35. test_epoch_loss = test_running_loss / len(test_loader.dataset)
  36. test_epoch_acc = test_correct /test_total
  37. print('epoch', epoch,
  38. 'loss', round(epoch_loss, 3),
  39. 'accuracy', round(epoch_acc, 3),
  40. 'test_loss', round(test_epoch_loss, 3),
  41. 'test_accuracy', round(test_epoch_acc, 3))
  42. return epoch_loss, epoch_acc, test_epoch_loss, test_epoch_acc
  • 指定训练
  1. # 指定训练次数
  2. epochs = 10
  3. train_loss = []
  4. train_acc = []
  5. test_loss = []
  6. test_acc = []
  7. for epoch in range(epochs):
  8. epoch_loss, epoch_acc, test_epoch_loss, test_epoch_acc = fit(epoch, model,
  9. train_d1, test_d1)
  10. train_loss.append(epoch_loss)
  11. train_acc.append(epoch_acc)
  12. test_loss.append(epoch_loss)
  13. test_acc.append(epoch_acc)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/581948
推荐阅读
相关标签
  

闽ICP备14008679号