当前位置:   article > 正文

Encoder+Decoder+LSTM 预测图像帧_读取每祯图像预测

读取每祯图像预测

Git代码地址:https://github.com/wdf19961118/LSTM 

问题描述:

在这里基于卷积循环神经网络,做一个图像序列的预测。输入连续的16张图像帧,图像大小(3,128,128)。利用卷积网络对每连续的16张图像进行Encoder特征提取,然后将提取的特征序列输入到循环神经网络(LSTM)中,之后通过Decoder反卷积成原图像大小的troch(3,12,8,128),也可以当做根据前16帧生成了第17帧图像,原序列第17帧图像作为label,计算loss。

数据预处理:

1、想得到一个txt文本,里面每一行记录连续的17帧图像的路径

2、如何生成我们想要的txt路径文件 ?

数据集存储方式:

1)文件名按数字顺序:0,1,2。。。

2)每个文件夹下面都是一个视频的分解帧,命名方式如下:

3、代码:

  1. import os
  2. #
  3. dir='/home/lab226/wdf/imgsrc'
  4. fp = open('./img_path.txt','w+')
  5. imgfile_list = os.listdir('/home/lab226/wdf/imgsrc')
  6. #对文件夹列表按文件名的数字顺序排序
  7. imgfile_list.sort(key= lambda x:int(x[:]))
  8. #print(img_list)
  9. seqsize =17
  10. for imgfile in imgfile_list:
  11. filepath = os.path.join(dir,imgfile)
  12. img_list = os.listdir(filepath)
  13. #这个排序比较重要,因为我们要顺序取,但是文件的存储方式并不是按照我们理解的数字顺序存储
  14. img_list.sort(key=lambda x: int(x[:-4]))
  15. #滑窗取序列,步长为8
  16. for i in range(0, len(img_list)-seqsize, 8):
  17. for j in range(i,i+seqsize):
  18. img = img_list[j]
  19. path = os.path.join(filepath, img)
  20. if j == i+seqsize-1:
  21. fp.write(path+'\n')
  22. else:
  23. fp.write(path+' ')
  24. fp.close()

 

 数据加载:

我写了自己的SeqDataset,改写了Dataset类中的__getitem__()函数,使得每次迭代返回连续的16张图像和第17张标签图像。详细代码如下:

  1. class SeqDataset(Dataset):
  2. def __init__(self, txt, transform=None, target_transform=None, loader=default_loader):
  3. fh = open(txt, 'r')
  4. imgseqs = []
  5. for line in fh:
  6. line = line.strip('\n')
  7. line = line.rstrip()
  8. imgseqs.append(line)
  9. self.num_samples = len(imgseqs)
  10. self.imgseqs = imgseqs
  11. self.transform = transform
  12. self.target_transform = target_transform
  13. self.loader = loader
  14. def __getitem__(self, index):
  15. current_index = np.random.choice(range(0, self.num_samples))
  16. imgs_path = self.imgseqs[current_index].split()
  17. current_imgs = []
  18. current_imgs_path = imgs_path[:len(imgs_path)-1]
  19. current_label_path = imgs_path[len(imgs_path)-1]
  20. current_label = self.loader(current_label_path)
  21. for frame in current_imgs_path:
  22. img = self.loader(frame)
  23. if self.transform is not None:
  24. img = self.transform(img)
  25. current_imgs.append(img)
  26. current_label = self.transform(current_label)
  27. #print(current_label.shape)
  28. batch_cur_imgs = np.stack(current_imgs, axis=0)
  29. return batch_cur_imgs, current_label
  30. transform_list = [
  31. transforms.ToTensor()
  32. ]
  33. data_transforms = transforms.Compose( transform_list )
  34. train_data = SeqDataset(txt='./img_path.txt',transform=data_transforms)
  35. train_loader = DataLoader(train_data, shuffle=True, num_workers=20,batch_size=BATCH_SIZE)

 

模型介绍:

由Encoder+LSTM和Decoder这两部分组成 

具体代码:

  1. class EncoderMUG2d_LSTM(nn.Module):
  2. def __init__(self, input_nc=3, encode_dim=1024, lstm_hidden_size=1024, seq_len=SEQ_SIZE, num_lstm_layers=1, bidirectional=False):
  3. super(EncoderMUG2d_LSTM, self).__init__()
  4. self.seq_len = seq_len
  5. self.num_directions = 2 if bidirectional else 1
  6. self.num_lstm_layers = num_lstm_layers
  7. self.lstm_hidden_size = lstm_hidden_size
  8. #3*128*128
  9. self.encoder = nn.Sequential(
  10. nn.Conv2d(input_nc, 32, 4,2,1), # 32*64*64
  11. nn.BatchNorm2d(32),
  12. nn.LeakyReLU(0.2, inplace=True),
  13. #32*63*63
  14. nn.Conv2d(32, 64, 4, 2, 1), # 64*32*32
  15. nn.BatchNorm2d(64),
  16. nn.LeakyReLU(0.2, inplace=True),
  17. #64*31*31
  18. nn.Conv2d(64, 128, 4, 2, 1), # 128*16*16
  19. nn.BatchNorm2d(128),
  20. nn.LeakyReLU(0.2, inplace=True),
  21. nn.Conv2d(128, 256, 4, 2, 1), # 256*8*8
  22. nn.BatchNorm2d(256),
  23. nn.LeakyReLU(0.2, inplace=True),
  24. nn.Conv2d(256, 512, 4, 2, 1), # 512*4*4
  25. nn.BatchNorm2d(512),
  26. nn.LeakyReLU(0.2, inplace=True),
  27. nn.Conv2d(512, 512, 4, 2, 1), # 512*2*2
  28. nn.BatchNorm2d(512),
  29. nn.LeakyReLU(0.2, inplace=True),
  30. nn.Conv2d(512, 1024, 4, 2, 1), # 1024*1*1
  31. nn.BatchNorm2d(1024),
  32. nn.LeakyReLU(0.2, inplace=True),
  33. )
  34. self.fc = nn.Linear(1024, encode_dim)
  35. self.lstm = nn.LSTM(encode_dim, encode_dim, batch_first=True)
  36. def init_hidden(self, x):
  37. batch_size = x.size(0)
  38. h = x.data.new(
  39. self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
  40. c = x.data.new(
  41. self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_()
  42. return Variable(h), Variable(c)
  43. def forward(self, x):
  44. #x.shape [batchsize,seqsize,3,128,128]
  45. B = x.size(0)
  46. x = x.view(B * SEQ_SIZE, 3, 128, 128) #x.shape[batchsize*seqsize,3,128,128]
  47. # [batchsize*seqsize, 3, 128, 128] -> [batchsize*seqsize, 1024,1,1]
  48. x = self.encoder(x)
  49. #[batchsize * seqsize, 1024, 1, 1]-> [batchsize*seqsize, 1024]
  50. x = x.view(-1, 1024)
  51. # [batchsize * seqsize, 1024]
  52. x = self.fc(x)
  53. # [batchsize , seqsize ,1024]
  54. x = x.view(-1, SEQ_SIZE, x.size(1))
  55. h0, c0 = self.init_hidden(x)
  56. output, (hn,cn) = self.lstm(x,(h0,c0))
  57. return hn
  58. class DecoderMUG2d(nn.Module):
  59. def __init__(self, output_nc=3, encode_dim=1024): #output size: 64x64
  60. super(DecoderMUG2d, self).__init__()
  61. self.project = nn.Sequential(
  62. nn.Linear(encode_dim, 1024*1*1),
  63. nn.ReLU(inplace=True)
  64. )
  65. self.decoder = nn.Sequential(
  66. nn.ConvTranspose2d(1024, 512, 4), # 512*4*4
  67. nn.BatchNorm2d(512),
  68. nn.ReLU(True),
  69. nn.ConvTranspose2d(512, 256, 4, stride=2), # 256*10*10
  70. nn.BatchNorm2d(256),
  71. nn.ReLU(True),
  72. nn.ConvTranspose2d(256, 128, 4), # 128*13*13
  73. nn.BatchNorm2d(128),
  74. nn.ReLU(True),
  75. nn.ConvTranspose2d(128, 64, 4,stride=2), # 64*28*28
  76. nn.BatchNorm2d(64),
  77. nn.ReLU(True),
  78. nn.ConvTranspose2d(64, 32, 4), # 32*31*31
  79. nn.BatchNorm2d(32),
  80. nn.ReLU(True),
  81. nn.ConvTranspose2d(32, 16, 4,stride=2), # 16*64*64
  82. nn.BatchNorm2d(16),
  83. nn.ReLU(True),
  84. nn.ConvTranspose2d(16, output_nc, 4, stride=2, padding=1), # 3*128*128
  85. nn.Sigmoid(),
  86. )
  87. def forward(self, x):
  88. x = self.project(x)
  89. x = x.view(-1, 1024, 1, 1)
  90. decode = self.decoder(x)
  91. return decode
  92. class net(nn.Module):
  93. def __init__(self):
  94. super(net,self).__init__()
  95. self.n1 = EncoderMUG2d_LSTM()
  96. self.n2 = DecoderMUG2d()
  97. def forward(self, x):
  98. output = self.n1(x)
  99. output = self.n2(output) #B*3*128*128
  100. return output

 

 

训练过程: 

  1. if __name__ == '__main__':
  2. model = net()
  3. if torch.cuda.is_available():
  4. model.cuda()
  5. optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  6. loss_func = nn.MSELoss()
  7. inputs, label = next(iter(train_loader))
  8. for epoch in range(10):
  9. print('epoch {}'.format(epoch + 1))
  10. train_loss = 0.
  11. train_acc = 0.
  12. #count = 1
  13. for batch_x, batch_y in train_loader:
  14. inputs, label = Variable(batch_x).cuda(), Variable(batch_y).cuda()
  15. output = model(inputs)
  16. loss = loss_func(output, label)/label.shape[0]
  17. optimizer.zero_grad()
  18. loss.backward()
  19. optimizer.step()
  20. print('epoch: {}, Loss: {:.4f}'.format(epoch + 1, loss.data.cpu().numpy()))
  21. if (epoch + 1) % 5 == 0: # 每 5 次,保存一下解码的图片和原图片
  22. pic = to_img(output.cpu().data)
  23. img = to_img(label.cpu().data)
  24. if not os.path.exists('./conv_autoencoder'):
  25. os.mkdir('./conv_autoencoder')
  26. save_image(pic, './conv_autoencoder/decode_image_{}.png'.format(epoch + 1))
  27. save_image(img, './conv_autoencoder/raw_image_{}.png'.format(epoch + 1))
  28. #count = count +1
  29. torch.save(model.state_dict(), PATH_SAVE)

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/219238
推荐阅读
相关标签
  

闽ICP备14008679号