赞
踩
记录一下自己学习的过程,内容主要来自于B站的一位up主,在此非常感谢他无私的奉献精神。看到他的视频请一键三连!
对于准备划分的数据我们要求文件夹有以下文件格式。
|--your_dataset
|--calss1
|--class2
|--class3
划分后的文件呈现以下文件夹格式。
|--data
|--train
|--calss1
|--class2
|--class3
|--val
|--calss1
|--class2
|--class3
|--test
|--calss1
|--class2
|--class3
import os from shutil import copy import random # 如果file不存在,创建file def mkfile(file): if not os.path.exists(file): os.makedirs(file) # 获取data文件夹下所有除.txt文件以外所有文件夹名(即需要分类的类名) # os.listdir():用于返回指定的文件夹包含的文件或文件夹的名字的列表 file_path = r'your_dataset_path' # 你的数据集路径 pet_class = [cla for cla in os.listdir(file_path) if ".txt" not in cla] # 创建训练集train文件夹,并由类名在其目录下创建子目录 mkfile('data/train') for cla in pet_class: mkfile('data/train/' + cla) # 创建验证集val文件夹,并由类名在其目录下创建子目录 mkfile('data/val') for cla in pet_class: mkfile('data/val/' + cla) # 创建测试集test文件夹,并由类名在其目录下创建子目录 mkfile('data/test') for cla in pet_class: mkfile('data/test/' + cla) # 划分比例,训练集 : 验证集 : 测试集 = 6 : 2 : 6 train_rate = 0.6 val_rate = 0.2 test_rate = 0.2 for cla in pet_class: cla_path = file_path + '/' + cla + '/' images = os.listdir(cla_path) num = len(images) # 计算每个子集的数量 train_num = int(num * train_rate) val_num = int(num * val_rate) test_num = int(num * test_rate) # 随机抽取图像名称 all_images = random.sample(images, k=num) train_images = all_images[:train_num] val_images = all_images[train_num:(train_num + val_num)] test_images = all_images[(train_num + val_num):] # 复制图像到相应的子集文件夹 for image in train_images: image_path = cla_path + image new_path = 'data/train/' + cla copy(image_path, new_path) for image in val_images: image_path = cla_path + image new_path = 'data/val/' + cla copy(image_path, new_path) for image in test_images: image_path = cla_path + image new_path = 'data/test/' + cla copy(image_path, new_path) print("\r[{}] processing done".format(cla)) print("\nData splitting done!")
AlexNet是2012年ILSVRC2012(ImageNet Large Scale Visual Recognition Challenge)竞赛的冠军网络,分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后,深度学习开始迅速发展。
过拟合问题
Dropout方法通过网络在正向传播过程中随机失活部分神经元,减少网络训练参数,从而解决过拟合问题。
由于当时实验设备资源受限,原始AlexNet网络是在两块gpu上进行训练的。AlexNet网络主要分为11层。分别是Conv1, Maxpool1, Conv2, Maxpool2, Conv3, Conv4, Conv5, Maxpool3, FC, FC, FC 每一层的具体数据如下:
import torch import torch.nn as nn class AlexNet(nn.Module): def __init__(self, num_classes=1000, init_weights=False): super(AlexNet, self).__init__() self.features = nn.Sequential( # 这里的padding操作原论文是padding为(1,2) nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # 这里的padding操作原论文是pdadding为(2,2) nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), # 原来padding为(1,1) nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), # 原来padding为(1,1) nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), # 原来padding为(1,1) nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( nn.Dropout(p=0.5), nn.Linear(6 * 6 * 256, 2048), nn.ReLU(inplace=True), nn.Linear(2048, 2048), nn.ReLU(inplace=True), nn.Linear(2048, num_classes), ) if init_weights: self._initialize_weights() def forward(self, x): x = self.features(x) x = torch.flatten(x, start_dim=1) x = self.classifier(x) return x # 初始化方法,pytorch内置的就有初始化,不需要我们单独去设置 def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0)
import os import sys import json import torch import torch.nn as nn from torchvision import transforms, datasets, utils import matplotlib.pyplot as plt import numpy as np import torch.optim as optim from tqdm import tqdm from model import AlexNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]), "val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224) transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])} data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path assert os.path.exists(image_path), "{} path does not exist.".format(image_path) train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) with open('class_indices.json', 'w') as json_file: json_file.write(json_str) batch_size = 32 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw) validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"), transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False, num_workers=nw) print("using {} images for training, {} images for validation.".format(train_num, val_num)) # test_data_iter = iter(validate_loader) # test_image, test_label = test_data_iter.next() # # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4))) # imshow(utils.make_grid(test_image)) net = AlexNet(num_classes=5, init_weights=True) net.to(device) loss_function = nn.CrossEntropyLoss() # pata = list(net.parameters()) optimizer = optim.Adam(net.parameters(), lr=0.0002) epochs = 10 save_path = './AlexNet.pth' best_acc = 0.0 train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader, file=sys.stdout) for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() outputs = net(images.to(device)) loss = loss_function(outputs, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, file=sys.stdout) for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training') if __name__ == '__main__': main()
记录一下自己的一些小疑问。
经过torchvision.datasets.ImageFolder
操作之后的数据会有以下三个属性。
下面我们通过一个小实验来验证上面三个属性。
input:
from torchvision import datasets
image = datasets.ImageFolder(root=r"E:\Dateset\flower_photos")
print('属性1{}'.format(image.classes))
print('属性2{}'.format(image.class_to_idx))
print('属性3{}'.format(image.imgs))
output(部分)
属性1 ['daisy', 'dandelion', 'flower_photos', 'roses', 'sunflowers', 'tulips']
属性2 {'daisy': 0, 'dandelion': 1, 'flower_photos': 2, 'roses': 3, 'sunflowers': 4, 'tulips': 5}
属性3 [('E:\\Dateset\\flower_photos\\daisy\\100080576_f52e8ee070_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10140303196_b88d3d6cec.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172379554_b296050f82_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172567486_2748826a8b.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172636503_21bededa75_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\102841525_bd6628ae3c.jpg', 0)]
import os.path import matplotlib.pyplot as plt import torch from torchvision import transforms from PIL import Image import json from model import AlexNet def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else"cpu") data_transform = transforms.Compose( [transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # 加载预测图片 img_path = '' assert os.path.exists(img_path), "file: {} does not exist".format(img_path) img = Image.open(img_path) plt.imshow(img_path) img = data_transform(img) # 添加一个batch维度 img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path) with open(json_path, "r") as f: class_indict = json.load(f) model = AlexNet(num_classes=5).to(device) # load model weights weights_path = "./AlexNet.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path) model.load_state_dict(torch.load(weights_path)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show() if __name__ == '__main__': main()
torch.squeeze()
torch.squeeze(input, dim=None)
,该函数总共有两个参数,维度dim
可以传入int
整型获取tuple
元组类型。对于不指定dim
的输出将会删除所有输入大小为1的维度。>>> x = torch.zeros(2, 1, 2, 1, 2)
>>> x.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x)
>>> y.size()
torch.Size([2, 2, 2])
>>> y = torch.squeeze(x, 0)
>>> y.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x, 1)
>>> y.size()
torch.Size([2, 2, 1, 2])
>>> y = torch.squeeze(x, (1, 2, 3))
torch.Size([2, 2, 2])
torch.unsqueeze
看一下具体的例子就可以明白。
a = torch.randn(2,3)
>>>a.shape
torch.Size([2, 3])
>>>b = torch.unsqueeze(a, dim=0)
torch.Size([1, 2, 3])
>>>c = torch.unsqueeze(a, dim=1)
torch.Size([1, 2, 3])
>>> d = torch.unsqueeze(a, dim=2)
torch.Size([2, 3, 1])
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。