赞
踩
背景:
深度学习对数据量的需求明显高于传统机器学习方法,当真实的数据量较少或难以满足实际网络收敛情况下,需用通过数据增强方法生成更多可用于训练的更多图片,数据增强方法通过对图像进行随机剪裁,翻转等变换,提升图像输入的丰富度,但数据增强本质上是在原图上做“线性变化”,无法产生原图中没有的图像。在这种条件下,GAN(Generative Adversarial Networks )能够在有限数据条件下生成更加丰富的数据资源,提升网络训练的有效性。
DCGAN:使用卷积神经网络替代GAN 中的多层感知机(MLP)并对网络做微调处理,显著提升了图像生成的质量。
论文链接:Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
DCGAN设计技巧
基于Pytorch的DCGAN数据增强方法:
生成器(Generator):
- class NetG(nn.Module):
- def __init__(self, ngf, nz):
- super(NetG, self).__init__()
- # layer1输入的是一个100x1x1的随机噪声, 输出尺寸(ngf*8)x4x4
- self.layer1 = nn.Sequential(
- nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
- nn.BatchNorm2d(ngf * 8),
- nn.ReLU(inplace=True)
- )
- # layer2输出尺寸(ngf*4)x8x8
- self.layer2 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf * 4),
- nn.ReLU(inplace=True)
- )
- # layer3输出尺寸(ngf*2)x16x16
- self.layer3 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf * 2),
- nn.ReLU(inplace=True)
- )
- # layer4输出尺寸(ngf)x32x32
- self.layer4 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf),
- nn.ReLU(inplace=True)
- )
- # layer5输出尺寸 3x96x96
- self.layer5 = nn.Sequential(
- nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
- nn.Tanh()
- )
-
- # 定义NetG的前向传播
- def forward(self, x):
- out = self.layer1(x)
- out = self.layer2(out)
- out = self.layer3(out)
- out = self.layer4(out)
- out = self.layer5(out)
- return out
判别器(Discriminator):(生成器的逆过程)
- # 定义鉴别器网络D
- class NetD(nn.Module):
- def __init__(self, ndf):
- super(NetD, self).__init__()
- # layer1 输入 3 x 96 x 96, 输出 (ndf) x 32 x 32
- self.layer1 = nn.Sequential(
- nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
- nn.BatchNorm2d(ndf),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer2 输出 (ndf*2) x 16 x 16
- self.layer2 = nn.Sequential(
- nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 2),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer3 输出 (ndf*4) x 8 x 8
- self.layer3 = nn.Sequential(
- nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 4),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer4 输出 (ndf*8) x 4 x 4
- self.layer4 = nn.Sequential(
- nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 8),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer5 输出一个数(概率)
- self.layer5 = nn.Sequential(
- nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
- nn.Sigmoid()
- )
-
- # 定义NetD的前向传播
- def forward(self,x):
- out = self.layer1(x)
- out = self.layer2(out)
- out = self.layer3(out)
- out = self.layer4(out)
- out = self.layer5(out)
- return out
损失函数(Loss function):
- criterion = nn.BCELoss()
- optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
- optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
- #每次训练中
- for epoch in range(1, opt.epoch + 1):
- for i, (imgs,_) in enumerate(dataloader):
- # 固定生成器G,训练鉴别器D
- optimizerD.zero_grad()
- ## 让D尽可能的把真图片判别为1
- imgs=imgs.to(device)
- output = netD(imgs)
- label.data.fill_(real_label)
- label=label.to(device)
- #判别图像和标签的loss
- errD_real = criterion(output, label)
- errD_real.backward()
- ## 让D尽可能把假图片判别为0
- label.data.fill_(fake_label)
- noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
- noise=noise.to(device)
- fake = netG(noise) # 生成假图
- output = netD(fake.detach()) #避免梯度传到G,因为G不用更新
- errD_fake = criterion(output, label)
- errD_fake.backward()
- errD = errD_fake + errD_real
- optimizerD.step()
-
- # 固定鉴别器D,训练生成器G
- optimizerG.zero_grad()
- # 让D尽可能把G生成的假图判别为1
- label.data.fill_(real_label)
- label = label.to(device)
- output = netD(fake)
- errG = criterion(output, label)
- errG.backward()
- optimizerG.step()
模型: model.py
- import torch.nn as nn
- # 定义生成器网络G
- class NetG(nn.Module):
- def __init__(self, ngf, nz):
- super(NetG, self).__init__()
- # layer1输入的是一个100x1x1的随机噪声, 输出尺寸(ngf*8)x4x4
- self.layer1 = nn.Sequential(
- nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
- nn.BatchNorm2d(ngf * 8),
- nn.ReLU(inplace=True)
- )
- # layer2输出尺寸(ngf*4)x8x8
- self.layer2 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf * 4),
- nn.ReLU(inplace=True)
- )
- # layer3输出尺寸(ngf*2)x16x16
- self.layer3 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf * 2),
- nn.ReLU(inplace=True)
- )
- # layer4输出尺寸(ngf)x32x32
- self.layer4 = nn.Sequential(
- nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ngf),
- nn.ReLU(inplace=True)
- )
- # layer5输出尺寸 3x96x96
- self.layer5 = nn.Sequential(
- nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
- nn.Tanh()
- )
-
- # 定义NetG的前向传播
- def forward(self, x):
- out = self.layer1(x)
- out = self.layer2(out)
- out = self.layer3(out)
- out = self.layer4(out)
- out = self.layer5(out)
- return out
-
-
- # 定义鉴别器网络D
- class NetD(nn.Module):
- def __init__(self, ndf):
- super(NetD, self).__init__()
- # layer1 输入 3 x 96 x 96, 输出 (ndf) x 32 x 32
- self.layer1 = nn.Sequential(
- nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
- nn.BatchNorm2d(ndf),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer2 输出 (ndf*2) x 16 x 16
- self.layer2 = nn.Sequential(
- nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 2),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer3 输出 (ndf*4) x 8 x 8
- self.layer3 = nn.Sequential(
- nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 4),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer4 输出 (ndf*8) x 4 x 4
- self.layer4 = nn.Sequential(
- nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
- nn.BatchNorm2d(ndf * 8),
- nn.LeakyReLU(0.2, inplace=True)
- )
- # layer5 输出一个数(概率)
- self.layer5 = nn.Sequential(
- nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
- nn.Sigmoid()
- )
-
- # 定义NetD的前向传播
- def forward(self,x):
- out = self.layer1(x)
- out = self.layer2(out)
- out = self.layer3(out)
- out = self.layer4(out)
- out = self.layer5(out)
- return out
-
训练:train.py
- import os
- import argparse
- import torch
- import torchvision
- import torchvision.utils as vutils
- from torchvision import transforms
- from torchvision.transforms import ToPILImage
- import torch.nn as nn
- from random import randint
- from model import NetD, NetG
- from PIL import Image
- from utils import tensor_to_PIL
-
- parser = argparse.ArgumentParser()
- parser.add_argument('--batchSize', type=int, default=32)
- parser.add_argument('--imageSize', type=int, default=96)
- parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
- parser.add_argument('--ngf', type=int, default=64)
- parser.add_argument('--ndf', type=int, default=64)
- parser.add_argument('--epoch', type=int, default=40000, help='number of epochs to train for')
- parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
- parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
- parser.add_argument('--data_path', default='data/', help='folder to train data')
- parser.add_argument('--outf', default='imgs/', help='folder to output images and model checkpoints')
- opt = parser.parse_args()
- # 定义是否使用GPU
- device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
-
- #图像读入与预处理
- transforms = torchvision.transforms.Compose([
- torchvision.transforms.Scale(opt.imageSize),
- torchvision.transforms.ToTensor(),
- torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])
-
- dataset = torchvision.datasets.ImageFolder(opt.data_path, transform=transforms)
-
- dataloader = torch.utils.data.DataLoader(
- dataset=dataset,
- batch_size=opt.batchSize,
- shuffle=True,
- drop_last=True,
- )
-
- netG = NetG(opt.ngf, opt.nz).to(device)
- netD = NetD(opt.ndf).to(device)
-
- criterion = nn.BCELoss()
- optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
- optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
-
- label = torch.FloatTensor(opt.batchSize)
- real_label = 1
- fake_label = 0
- save_path = './imgs/epoch{:s}'
- j = 0
- for epoch in range(1, opt.epoch + 1):
- for i, (imgs,_) in enumerate(dataloader):
- # 固定生成器G,训练鉴别器D
- optimizerD.zero_grad()
- ## 让D尽可能的把真图片判别为1
- imgs=imgs.to(device)
- output = netD(imgs)
- label.data.fill_(real_label)
- label=label.to(device)
- errD_real = criterion(output, label)
- errD_real.backward()
- ## 让D尽可能把假图片判别为0
- label.data.fill_(fake_label)
- noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
- noise=noise.to(device)
- fake = netG(noise) # 生成假图
- output = netD(fake.detach()) #避免梯度传到G,因为G不用更新
- errD_fake = criterion(output, label)
- errD_fake.backward()
- errD = errD_fake + errD_real
- optimizerD.step()
-
- # 固定鉴别器D,训练生成器G
- optimizerG.zero_grad()
- # 让D尽可能把G生成的假图判别为1
- label.data.fill_(real_label)
- label = label.to(device)
- output = netD(fake)
- errG = criterion(output, label)
- errG.backward()
- optimizerG.step()
-
- print('[%d/%d][%d/%d] Loss_D: %.3f Loss_G %.3f'
- % (epoch, opt.epoch, i, len(dataloader), errD.item(), errG.item()))
- if epoch % 1000 == 0:
- os.mkdir(save_path.format(str(j)))
- for i in range(len(fake.data)):
- #im = tensor_to_PIL(fake.data[i])
- #im = im.convert('RGB')
- #im.save(os.path.join(save_path.format(str(j)),str(i)+'.png'))
- vutils.save_image(fake.data[i],
- '%s/%d.png' % (save_path.format(str(j)), i),
- normalize=True)
- torch.save(netG.state_dict(), '%s/netG_%03d.pth' % (opt.outf, epoch))
- torch.save(netD.state_dict(), '%s/netD_%03d.pth' % (opt.outf, epoch))
- j = j+1
参考博客:Pytorch版DCGAN图像生成技术
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。