赞
踩
自动编码器(AutoEncoder)由编码器(Encoder)和解码器(Decoder)两部分组成。编码器和解码器可以是任意模型,通常神经网络模型作为编码器和解码器。
自动编码器作为一种数据压缩的方法,其原理是:输入数据经过编码器变成一个编码(code),然后将这个编码作为解码器的输入,观察解码器的输出是否能还原原始数据,因此将解码器的输出和原始数据的误差作为最优化的目标。
下面以MNIST数据集为例,使用pytorch1.0构建一个卷积神经网络做自动编码器。
1.添加引用的库文件
- import os
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torch.utils.data import DataLoader
- from torchvision import datasets, transforms
- from torchvision.utils import save_image
2.定义超参数,是否使用GPU加速
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- batch_size = 512
3.加载MNIST数据集,并将图片的大小变为-1~1之间,这样可以使输入变得更对称,训练更加容易收敛。
- # 标准化
- data_tf = transforms.Compose(
- [transforms.ToTensor(),
- transforms.Normalize([0.5], [0.5])]
- )
-
-
- train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
- train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
4.定义卷积神经网络的自动编码器
- class AutoEncoder(nn.Module):
- def __init__(self):
- super(AutoEncoder, self).__init__()
- self.encoder = nn.Sequential(
- nn.Conv2d(1, 16, 3, stride=3, padding=1), # b,16,10,10
- nn.ReLU(True),
- nn.MaxPool2d(2, stride=2), # b,16,5,5
- nn.Conv2d(16, 8, 3, stride=2, padding=1), # b,8,3,3
- nn.ReLU(True),
- nn.MaxPool2d(2, stride=1) # b,8,2,2
- )
- self.decoder = nn.Sequential(
- nn.ConvTranspose2d(8, 16, 3, stride=2), # b,16,5,5
- nn.ReLU(True),
- nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1), # b,8,15,15
- nn.ReLU(True),
- nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1), # b,1,28,28
- nn.Tanh()
- )
-
- def forward(self, x):
- encode = self.encoder(x)
- decode = self.decoder(encode)
- return encode, decode
-
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0,groups=1, bias=True, dilation=1)
对于每一条边输入,输出的尺寸的公式如下:
解码器使用nn.ConvTranspose2d(),可以看作卷积的反操作。具体参数如下:
torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0,output_padding=0, groups=1, bias=True, dilation=1)
对于每一条边输入,输出的尺寸的公式如下:
5.实例化模型,定义loss函数和优化函数
- model = AutoEncoder().to(device)
-
- # 定义loss函数和优化方法
- loss_fn = nn.MSELoss()
- optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
6.训练并保存解码器生成的图片
- for t in range(40):
- for data in train_data:
- img, label = data
- img = img.to(device)
- label = label.to(device)
- _, output = model(img)
- loss = loss_fn(output, img) / img.shape[0] # 平均损失
-
- # 反向传播
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- if (t + 1) % 5 == 0: # 每 5 次,保存一下解码的图片和原图片
- print('epoch: {}, Loss: {:.4f}'.format(t + 1, loss.item()))
- pic = to_img(output.cpu().data)
- if not os.path.exists('./conv_autoencoder'):
- os.mkdir('./conv_autoencoder')
- save_image(pic, './conv_autoencoder/decode_image_{}.png'.format(t + 1))
- save_image(img, './conv_autoencoder/raw_image_{}.png'.format(t + 1))
结果对比(左边生成图片,右边原始图片):
附上完整代码:
- import os
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torch.utils.data import DataLoader
- from torchvision import datasets, transforms
- from torchvision.utils import save_image
-
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- batch_size = 512
-
- # 标准化
- data_tf = transforms.Compose(
- [transforms.ToTensor(),
- transforms.Normalize([0.5], [0.5])]
- )
-
-
- train_dataset = datasets.MNIST(root='./data', train=True, transform=data_tf, download=True)
- train_data = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
-
- def to_img(x):
- x = 0.5 * (x + 1.) # 将-1~1转成0-1
- x = x.clamp(0, 1)
- x = x.view(x.shape[0], 1, 28, 28)
- return x
-
-
- class AutoEncoder(nn.Module):
- def __init__(self):
- super(AutoEncoder, self).__init__()
- self.encoder = nn.Sequential(
- nn.Conv2d(1, 16, 3, stride=3, padding=1), # b,16,10,10
- nn.ReLU(True),
- nn.MaxPool2d(2, stride=2), # b,16,5,5
- nn.Conv2d(16, 8, 3, stride=2, padding=1), # b,8,3,3
- nn.ReLU(True),
- nn.MaxPool2d(2, stride=1) # b,8,2,2
- )
- self.decoder = nn.Sequential(
- # nn.ConvTranspose2d(8, 8, 3, stride=2, padding=1), # b,8,3,3
- # nn.ReLU(True),
- # nn.ConvTranspose2d(8, 16, 4, stride=4, padding=1), # b,16,10,10
- # nn.ReLU(True),
- # nn.ConvTranspose2d(16, 1, 3, stride=3, padding=1), # b,1,28,28
- # nn.Tanh()
- nn.ConvTranspose2d(8, 16, 3, stride=2), # b,16,5,5
- nn.ReLU(True),
- nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1), # b,8,15,15
- nn.ReLU(True),
- nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1), # b,1,28,28
- nn.Tanh()
- )
-
- def forward(self, x):
- encode = self.encoder(x)
- decode = self.decoder(encode)
- return encode, decode
-
-
- model = AutoEncoder().to(device)
-
- # 定义loss函数和优化方法
- loss_fn = nn.MSELoss()
- optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
-
- for t in range(40):
- for data in train_data:
- img, label = data
- img = img.to(device)
- label = label.to(device)
- _, output = model(img)
- loss = loss_fn(output, img) / img.shape[0] # 平均损失
-
- # 反向传播
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- if (t + 1) % 5 == 0: # 每 5 次,保存一下解码的图片和原图片
- print('epoch: {}, Loss: {:.4f}'.format(t + 1, loss.item()))
- pic = to_img(output.cpu().data)
- if not os.path.exists('./conv_autoencoder'):
- os.mkdir('./conv_autoencoder')
- save_image(pic, './conv_autoencoder/decode_image_{}.png'.format(t + 1))
- save_image(img, './conv_autoencoder/raw_image_{}.png'.format(t + 1))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。