赞
踩
关于训练集的介绍和数据划分可以参照上一个博客:
https://blog.csdn.net/qq_41685265/article/details/104895273
- class DogCat(data.Dataset):
-
- def __init__(self, root, transforms=None, train=True, test=False):
- """
- 主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据
- """
- self.test = test
- imgs = [os.path.join(root, img) for img in os.listdir(root)]
-
-
- # test1: data/test1/8973.jpg
- # train: data/train/cat.10004.jpg
- if self.test:
- imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
- else:
- imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
-
- imgs_num = len(imgs)
-
- if self.test:
- self.imgs = imgs
- elif train:
- self.imgs = imgs[:int(0.7 * imgs_num)]
- else:
- self.imgs = imgs[int(0.7 * imgs_num):]
-
- if transforms is None:
- normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225])
-
- if self.test or not train:
- self.transforms = T.Compose([
- T.Resize(224),
- T.CenterCrop(224),
- T.ToTensor(),
- normalize
- ])
- else:
- self.transforms = T.Compose([
- T.Resize(256),
- T.CenterCrop(224),
- T.RandomHorizontalFlip(),
- T.ToTensor(),
- normalize
- ])
-
- def __getitem__(self, index):
- """
- 一次返回一张图片的数据
- """
- img_path = self.imgs[index]
- if self.test:
- label = int(self.imgs[index].split('.')[-2].split('/')[-1])
- else:
- label = 1 if 'dog' in img_path.split('/')[-1] else 0
- data = Image.open(img_path)
- data = self.transforms(data)
- return data, label
-
- def __len__(self):
- return len(self.imgs)
有一些参数没有用得上
- class DefaultConfig(object):
- env = 'default' # visdom 环境
- vis_port = 8097 # visdom 端口
- model = 'ResNet34' # 使用的模型,名字必须与models/__init__.py中的名字一致
-
- train_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/train/' # 训练集存放路径
- test_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/test/' # 测试集存放路径
- load_model_path = None # 加载预训练的模型的路径,为None代表不加载
-
- batch_size = 48 # batch size
- use_gpu = True # user GPU or not
- num_workers = 4 # how many workers for loading data
- print_freq = 20 # print info every N batch
-
- debug_file = '/tmp/debug' # if os.path.exists(debug_file): enter ipdb
- result_file = 'result.csv'
-
- max_epoch = 10
- lr = 0.001 # initial learning rate
- lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay
- weight_decay = 0e-5 # 损失函数
-
-
-
- opt = DefaultConfig()
- class ResNet34(nn.Module):
- """
- 实现主module:ResNet34
- ResNet34包含多个layer,每个layer又包含多个Residual block
- 用子module来实现Residual block,用_make_layer函数来实现layer
- """
-
- def __init__(self, num_classes=2):
- super(ResNet34, self).__init__()
- self.model_name = 'resnet34'
-
- # 前几层: 图像转换
- resnet34 = models.resnet34(pretrained=True)
- self.resnet = nn.Sequential(*list(resnet34.children())[:-1])
-
- self.fc = nn.Linear(in_features=512, out_features=num_classes)
-
-
- def forward(self, x):
-
- x = self.resnet(x)
- x = x.view(x.size(0), -1)
- x = self.fc(x)
- return x
-
-
-
-
- device = "cuda" if opt.use_gpu==True else "cpu"
-
- net = ResNet34()
- net.to(device)
- def val(model,dataloader):
- """
- 计算模型在验证集上的准确率等信息
- """
- model.eval()
- confusion_matrix = meter.ConfusionMeter(2)
- for ii, (val_input, label) in tqdm(enumerate(dataloader)):
- val_input = val_input.to(device)
- with torch.no_grad():
- score = model(val_input)
- confusion_matrix.add(score.detach().squeeze(), label.type(t.LongTensor))
-
- model.train()
- cm_value = confusion_matrix.value()
- accuracy = 100. * (cm_value[0][0] + cm_value[1][1]) / (cm_value.sum())
- return confusion_matrix, accuracy
-
-
-
- def train():
-
- # step1: configure model
- # net
-
- # step2: data
- train_data = DogCat(opt.train_data_root, train=True)
- # print(len(train_data))
- val_data = DogCat(opt.train_data_root, train=False)
- train_dataloader = DataLoader(train_data, opt.batch_size,
- shuffle=True, num_workers=opt.num_workers)
- val_dataloader = DataLoader(val_data, opt.batch_size,
- shuffle=False, num_workers=opt.num_workers)
-
- # step3: criterion and optimizer
- criterion = nn.CrossEntropyLoss()
- optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=opt.lr, momentum=0.9)
- lr = opt.lr
-
- # step4: meters
- loss_meter = meter.AverageValueMeter()
- confusion_matrix = meter.ConfusionMeter(2)
- previous_loss = 1e10
-
- # train
- for epoch in range(opt.max_epoch):
- # print(epoch)
-
- loss_meter.reset()
- confusion_matrix.reset()
-
- for input, label in train_dataloader:
-
- # train model
- input = input.to(device)
- target = label.to(device)
-
- optimizer.zero_grad()
- score = net(input)
- loss = criterion(score, target)
- loss.backward()
- optimizer.step()
-
- # meters update and visualize
- loss_meter.add(loss.item())
- # detach 一下更安全保险
- confusion_matrix.add(score.detach(), target.detach())
-
-
-
-
-
-
-
- # validate and visualize
- val_cm, val_accuracy = val(net, val_dataloader)
-
- # vis.plot('val_accuracy', val_accuracy)
- print("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
- epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
- lr=lr))
- cm_train = confusion_matrix.value()
- train_accuracy = 100. * (cm_train[0][0] + cm_train[1][1]) / (cm_train.sum())
- print("train-ACC: %.3f"%train_accuracy)
- print("val-ACC: %.3f"%val_accuracy)
-
-
-
- previous_loss = loss_meter.value()[0]
-
-
由于使用了预训练的模型,因此网络在迭代到第二代时,验证机正确率已经超过了99%,在5-10代时,训练集精度已经在99.5%左右,下边从测试机中选出几个检验:
模型的输出结果为:
- 1,4.009683834738098e-06
- 2,1.8512298538553296e-06
- 3,1.0709993148338981e-05
- 4,1.2812791283067781e-05
- 5,0.9999998807907104
- 6,0.999983549118042
- 7,0.9999879598617554
- 8,0.9999997615814209
- 9,0.9999997615814209
- 10,0.9999998807907104
- 11,0.9999719858169556
- 12,4.395487849251367e-06
- 13,0.9999877214431763
- 14,0.999725878238678
- 15,0.9999864101409912
- 16,0.9999921321868896
0代表狗,1代表猫,很显然上边的数据的分类效果是很好的
- # coding:utf8
- import os
- from PIL import Image
- from torch.utils import data
- import numpy as np
- from torchvision import transforms as T
- import torch as t
- import torch.nn as nn
- from torchvision import datasets, models, transforms
- from torch.utils.data import DataLoader
- from torchnet import meter
- from tqdm import tqdm
- import torch
- import csv
-
-
- class DogCat(data.Dataset):
-
- def __init__(self, root, transforms=None, train=True, test=False):
- """
- 主要目标: 获取所有图片的地址,并根据训练,验证,测试划分数据
- """
- self.test = test
- imgs = [os.path.join(root, img) for img in os.listdir(root)]
-
-
- # test1: data/test1/8973.jpg
- # train: data/train/cat.10004.jpg
- if self.test:
- imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
- else:
- imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))
-
- imgs_num = len(imgs)
-
- if self.test:
- self.imgs = imgs
- elif train:
- self.imgs = imgs[:int(0.7 * imgs_num)]
- else:
- self.imgs = imgs[int(0.7 * imgs_num):]
-
- if transforms is None:
- normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225])
-
- if self.test or not train:
- self.transforms = T.Compose([
- T.Resize(224),
- T.CenterCrop(224),
- T.ToTensor(),
- normalize
- ])
- else:
- self.transforms = T.Compose([
- T.Resize(256),
- T.CenterCrop(224),
- T.RandomHorizontalFlip(),
- T.ToTensor(),
- normalize
- ])
-
- def __getitem__(self, index):
- """
- 一次返回一张图片的数据
- """
- img_path = self.imgs[index]
- if self.test:
- label = int(self.imgs[index].split('.')[-2].split('/')[-1])
- else:
- label = 1 if 'dog' in img_path.split('/')[-1] else 0
- data = Image.open(img_path)
- data = self.transforms(data)
- return data, label
-
- def __len__(self):
- return len(self.imgs)
-
-
- class DefaultConfig(object):
- env = 'default' # visdom 环境
- vis_port = 8097 # visdom 端口
- model = 'ResNet34' # 使用的模型,名字必须与models/__init__.py中的名字一致
-
- train_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/train/' # 训练集存放路径
- test_data_root = '/media/cyq/CU/Ubuntu system files/dogs-vs-cats/test/' # 测试集存放路径
- load_model_path = None # 加载预训练的模型的路径,为None代表不加载
-
- batch_size = 48 # batch size
- use_gpu = True # user GPU or not
- num_workers = 4 # how many workers for loading data
- print_freq = 20 # print info every N batch
-
- debug_file = '/tmp/debug' # if os.path.exists(debug_file): enter ipdb
- result_file = 'result.csv'
-
- max_epoch = 10
- lr = 0.001 # initial learning rate
- lr_decay = 0.5 # when val_loss increase, lr = lr*lr_decay
- weight_decay = 0e-5 # 损失函数
-
-
-
- opt = DefaultConfig()
-
-
-
-
-
- class ResNet34(nn.Module):
- """
- 实现主module:ResNet34
- ResNet34包含多个layer,每个layer又包含多个Residual block
- 用子module来实现Residual block,用_make_layer函数来实现layer
- """
-
- def __init__(self, num_classes=2):
- super(ResNet34, self).__init__()
- self.model_name = 'resnet34'
-
- # 前几层: 图像转换
- resnet34 = models.resnet34(pretrained=True)
- self.resnet = nn.Sequential(*list(resnet34.children())[:-1])
-
- self.fc = nn.Linear(in_features=512, out_features=num_classes)
-
-
- def forward(self, x):
-
- x = self.resnet(x)
- x = x.view(x.size(0), -1)
- x = self.fc(x)
- return x
-
-
-
-
- device = "cuda" if opt.use_gpu==True else "cpu"
-
- net = ResNet34()
- net.to(device)
-
-
- def val(model,dataloader):
- """
- 计算模型在验证集上的准确率等信息
- """
- model.eval()
- confusion_matrix = meter.ConfusionMeter(2)
- for ii, (val_input, label) in tqdm(enumerate(dataloader)):
- val_input = val_input.to(device)
- with torch.no_grad():
- score = model(val_input)
- confusion_matrix.add(score.detach().squeeze(), label.type(t.LongTensor))
-
- model.train()
- cm_value = confusion_matrix.value()
- accuracy = 100. * (cm_value[0][0] + cm_value[1][1]) / (cm_value.sum())
- return confusion_matrix, accuracy
-
-
-
- def train():
-
- # step1: configure model
- # net
-
- # step2: data
- train_data = DogCat(opt.train_data_root, train=True)
- # print(len(train_data))
- val_data = DogCat(opt.train_data_root, train=False)
- train_dataloader = DataLoader(train_data, opt.batch_size,
- shuffle=True, num_workers=opt.num_workers)
- val_dataloader = DataLoader(val_data, opt.batch_size,
- shuffle=False, num_workers=opt.num_workers)
-
- # step3: criterion and optimizer
- criterion = nn.CrossEntropyLoss()
- optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=opt.lr, momentum=0.9)
- lr = opt.lr
-
- # step4: meters
- loss_meter = meter.AverageValueMeter()
- confusion_matrix = meter.ConfusionMeter(2)
- previous_loss = 1e10
-
- # train
- for epoch in range(opt.max_epoch):
- # print(epoch)
-
- loss_meter.reset()
- confusion_matrix.reset()
-
- for input, label in train_dataloader:
-
- # train model
- input = input.to(device)
- target = label.to(device)
-
- optimizer.zero_grad()
- score = net(input)
- loss = criterion(score, target)
- loss.backward()
- optimizer.step()
-
- # meters update and visualize
- loss_meter.add(loss.item())
- # detach 一下更安全保险
- confusion_matrix.add(score.detach(), target.detach())
-
-
- # validate and visualize
- val_cm, val_accuracy = val(net, val_dataloader)
-
- # vis.plot('val_accuracy', val_accuracy)
- print("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
- epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
- lr=lr))
- cm_train = confusion_matrix.value()
- train_accuracy = 100. * (cm_train[0][0] + cm_train[1][1]) / (cm_train.sum())
- print("train-ACC: %.3f"%train_accuracy)
- print("val-ACC: %.3f"%val_accuracy)
-
-
- previous_loss = loss_meter.value()[0]
-
-
- train()
- torch.save(net, 'model.pkl')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。