赞
踩
LoFTR模型源自2021年CVPR提出的一篇论文LoFTR: Detector-Free Local Feature Matching with Transformers,其基于pytorch实现图像配准,与基于superpoint+superglue的方法不同,
是一个端到端的图像配准方法。与LoFTR官方库相关的有loftr2onnx库,整体来说loftr2onnx库使用更方便,效果更好。但loftr2onnx转出的onnx模型是有问题的,不能使用。
项目地址:https://github.com/zju3dv/LoFTR
项目地址2:https://github.com/oooooha/loftr2onnx
demo体验:https://huggingface.co/spaces/kornia/Kornia-LoFTR
LoFTR论文讲解视频地址:https://www.bilibili.com/video/BV1Ce4y1Z7Gq/
访问 https://huggingface.co/spaces/kornia/Kornia-LoFTR
点配准的效果如下所示,可以看到与sp+sg方法相比,点对的平行关系直观上要好很多。
打开https://github.com/zju3dv/LoFTR 下载代码
安装项目使用:解压代码,然后在终端进入目录,执行pip install -r .\requirements.txt
https://drive.google.com/drive/folders/1DOcOPZb3-5cWxLqn256AhwUVjBPifhuf 下载权重。如果需要训练,可以下载训练数据与从测试数据。
将权重的压缩文件解压后放到项目根目录下
将 https://hpg123.blog.csdn.net/article/details/124824892 中章节3的代码(对应标题为 superpoint中read_img_as_tensor函数)保存为imgutils.py
from src.loftr import LoFTR, default_cfg import torch from imgutils import * import time # Initialize LoFTR matcher = LoFTR(config=default_cfg) matcher.load_state_dict(torch.load("weights/indoor_ds_new.ckpt")['state_dict']) matcher = matcher.eval().cuda() p1=r'C:\Users\hpg\Pictures\t1.jpg' p2=r'C:\Users\hpg\Pictures\t2.jpg' t1,im1=read_img_as_tensor(p1,(384,384)) t2,im2=read_img_as_tensor(p2,(384,384)) batch = {'image0': t1, 'image1': t2} # Inference with torch.no_grad(): matcher(batch) # t0=time.time() times=10 for i in range(times): matcher(batch) rt1=time.time()-t0 rt1=rt1/times mkpts0 = batch['mkpts0_f'].cpu().numpy() mkpts1 = batch['mkpts1_f'].cpu().numpy() mconf = batch['mconf'].cpu().numpy() print(f'运行时间:{rt1:.4f}',mkpts0.shape,mkpts1.shape,mconf)
代码运行效果如下所示,可以看到一个图片需要0.19s(笔记本,1060显卡),换成台式机3060显卡,预计在0.05s左右一张图
运行时间:0.1933 (32, 2) (32, 2) [0.22855578 0.21740437 0.34927088 0.28389925 0.27157754 0.26966828
0.22636016 0.22058277 0.20475665 0.20878278 0.22838292 0.25448585
0.27047077 0.34403533 0.22612476 0.2044811 0.26239234 0.32797554
0.2263804 0.26544347 0.3401669 0.39336586 0.3473139 0.28230694
0.23061718 0.23949552 0.46178365 0.3540019 0.5322925 0.27200237
0.26731068 0.39827508]
也可以基于 https://github.com/oooooha/loftr2onnx 项目进行图像配准
使用代码如下
from loftr_wrapper import LoFTRWrapper as LoFTR import torch from imgutils import * import time # Initialize LoFTR matcher = LoFTR() matcher.load_state_dict(torch.load("weights/indoor_ds_new.ckpt")['state_dict']) matcher = matcher.eval().cuda() p1=r'C:\Users\hpg\Pictures\t1.jpg' p2=r'C:\Users\hpg\Pictures\t2.jpg' t1,im1=read_img_as_tensor(p1,(384,384)) t2,im2=read_img_as_tensor(p2,(384,384)) # Inference with torch.no_grad(): result=matcher(t1,t2) # t0=time.time() times=10 for i in range(times): result=matcher(t1,t2) rt1=time.time()-t0 rt1=rt1/times mkpts0 = result['keypoints0'].cpu().numpy() mkpts1 = result['keypoints1'].cpu().numpy() mconf = result['confidence'].cpu().numpy() print(f'运行时间:{rt1:.4f}',mkpts0.shape,mkpts1.shape,mconf)
代码输出如下所示,可以看到与LoFTR项目的输出有所差异
运行时间:0.1925 (212, 2) (212, 2) [0.4566688 0.53420454 0.5319168 0.5320238 0.46744433 0.4068214 0.5363396 0.45674214 0.60001785 0.6576139 0.53006035 0.59590924 0.5725811 0.5505655 0.44364485 0.40315574 0.4293331 0.5060973 0.6550978 0.52451503 0.553644 0.63088214 0.6906601 0.61668074 0.4543735 0.4138872 0.4332955 0.47855106 0.60533136 0.6735143 0.7912271 0.7220486 0.75414115 0.75669855 0.60389113 0.40305066 0.71130437 0.6583284 0.5403245 0.5433615 0.40149704 0.6673844 0.4093839 0.5410701 0.51509964 0.42121148 0.68238974 0.55247396 0.5116625 0.8369319 0.53321654 0.5323315 0.5779519 0.64705926 0.43591025 0.40134645 0.4599252 0.46620858 0.6388375 0.8354758 0.515318 0.6521981 0.54744494 0.64528877 0.7466613 0.6359517 0.58179545 0.4587202 0.4856584 0.42029297 0.43322447 0.43220758 0.6896481 0.79645556 0.5817581 0.75245494 0.5786756 0.7251559 0.814531 0.49031648 0.46484298 0.54241467 0.5943087 0.7245115 0.6457875 0.8097793 0.7199513 0.49220178 0.5443373 0.4086104 0.5046131 0.7193697 0.6752727 0.41796637 0.5513792 0.7087418 0.7779165 0.75016826 0.68525094 0.58962977 0.6315668 0.4913085 0.56355244 0.41288543 0.52281946 0.42782715 0.43921712 0.5216018 0.5566503 0.78442967 0.6013023 0.42023212 0.43102428 0.61564064 0.40717542 0.49634054 0.45509326 0.4511342 0.41775596 0.55897176 0.56803375 0.6018254 0.71239305 0.44001386 0.43651453 0.6947733 0.8648205 0.4988858 0.40208712 0.71607304 0.9030141 0.5543826 0.49472648 0.5359598 0.74733096 0.6617334 0.7066015 0.725677 0.43446922 0.5126569 0.52367914 0.45096788 0.4248741 0.43285275 0.723374 0.86523044 0.65740126 0.427191 0.4776224 0.4801826 0.4530296 0.4275035 0.527438 0.52301216 0.58992577 0.41727343 0.48609605 0.7365703 0.6339512 0.6379226 0.4489899 0.41325048 0.5010124 0.49238032 0.57079905 0.62783945 0.5092921 0.5726387 0.60590863 0.44714844 0.6284152 0.40801758 0.40126294 0.4221419 0.52245826 0.70989937 0.49206337 0.553483 0.4956581 0.4180697 0.6228596 0.6543849 0.7747963 0.61180156 0.60290194 0.5421194 0.6149054 0.48783877 0.40048426 0.47044232 0.40145218 0.42359856 0.68902797 0.44713116 0.84827214 0.48961237 0.6137104 0.7752426 0.7184252 0.71058017 0.47483382 0.7151901 0.78853625 0.66988254 0.7502565 0.42592585 0.49173304 0.4657402 0.59592575 0.42850277 0.4645101 0.5070625 ]
1、这主要是loftr2onnx项目通过loftr_wrapper对LoFTR的forward流程进行了调整。
#!/usr/bin/env python import copy import os import sys from typing import Any, Dict import torch from einops.einops import rearrange _CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(_CURRENT_DIR, "LoFTR")) from loftr import LoFTR, default_cfg DEFAULT_CFG = copy.deepcopy(default_cfg) DEFAULT_CFG["coarse"]["temp_bug_fix"] = True class LoFTRWrapper(LoFTR): def __init__( self, config: Dict[str, Any] = DEFAULT_CFG, ): LoFTR.__init__(self, config) def forward( self, image0: torch.Tensor, image1: torch.Tensor, ) -> Dict[str, torch.Tensor]: data = { "image0": image0, "image1": image1, } del image0, image1 data.update( { "bs": data["image0"].size(0), "hw0_i": data["image0"].shape[2:], "hw1_i": data["image1"].shape[2:], } ) if data["hw0_i"] == data["hw1_i"]: # faster & better BN convergence feats_c, feats_f = self.backbone( torch.cat([data["image0"], data["image1"]], dim=0) ) (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split( data["bs"] ), feats_f.split(data["bs"]) else: # handle different input shapes (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone( data["image0"] ), self.backbone(data["image1"]) data.update( { "hw0_c": feat_c0.shape[2:], "hw1_c": feat_c1.shape[2:], "hw0_f": feat_f0.shape[2:], "hw1_f": feat_f1.shape[2:], } ) # 2. coarse-level loftr module # add featmap with positional encoding, then flatten it to sequence [N, HW, C] feat_c0 = rearrange(self.pos_encoding(feat_c0), "n c h w -> n (h w) c") feat_c1 = rearrange(self.pos_encoding(feat_c1), "n c h w -> n (h w) c") mask_c0 = mask_c1 = None # mask is useful in training if "mask0" in data: mask_c0, mask_c1 = data["mask0"].flatten(-2), data["mask1"].flatten(-2) feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1) # 3. match coarse-level self.coarse_matching(feat_c0, feat_c1, data, mask_c0=mask_c0, mask_c1=mask_c1) # 4. fine-level refinement feat_f0_unfold, feat_f1_unfold = self.fine_preprocess( feat_f0, feat_f1, feat_c0, feat_c1, data ) if feat_f0_unfold.size(0) != 0: # at least one coarse level predicted feat_f0_unfold, feat_f1_unfold = self.loftr_fine( feat_f0_unfold, feat_f1_unfold ) # 5. match fine-level self.fine_matching(feat_f0_unfold, feat_f1_unfold, data) rename_keys: Dict[str, str] = { "mkpts0_f": "keypoints0", "mkpts1_f": "keypoints1", "mconf": "confidence", } out: Dict[str, torch.Tensor] = {} for k, v in rename_keys.items(): _d = data[k] if isinstance(_d, torch.Tensor): out[v] = _d else: raise TypeError( f"Expected torch.Tensor for item `{k}`. Gotcha {type(_d)}" ) del data return out
2、然后cfg或许有不同
loftr2onnx中的默认配置在loftr\utils\cvpr_ds_config.py中,如下所示。可以看到 _CN.MATCH_COARSE.THR与_CN.MATCH_COARSE.BORDER_RM 是做过修改的,与默认值不同
from yacs.config import CfgNode as CN def lower_config(yacs_cfg): if not isinstance(yacs_cfg, CN): return yacs_cfg return {k.lower(): lower_config(v) for k, v in yacs_cfg.items()} _CN = CN() _CN.BACKBONE_TYPE = 'ResNetFPN' _CN.RESOLUTION = (8, 2) # options: [(8, 2), (16, 4)] _CN.FINE_WINDOW_SIZE = 5 # window_size in fine_level, must be odd _CN.FINE_CONCAT_COARSE_FEAT = True # 1. LoFTR-backbone (local feature CNN) config _CN.RESNETFPN = CN() _CN.RESNETFPN.INITIAL_DIM = 128 _CN.RESNETFPN.BLOCK_DIMS = [128, 196, 256] # s1, s2, s3 # 2. LoFTR-coarse module config _CN.COARSE = CN() _CN.COARSE.D_MODEL = 256 _CN.COARSE.D_FFN = 256 _CN.COARSE.NHEAD = 8 _CN.COARSE.LAYER_NAMES = ['self', 'cross'] * 4 _CN.COARSE.ATTENTION = 'linear' # options: ['linear', 'full'] _CN.COARSE.TEMP_BUG_FIX = False # 3. Coarse-Matching config _CN.MATCH_COARSE = CN() _CN.MATCH_COARSE.THR = 0.4 # thresh default=0.2 _CN.MATCH_COARSE.BORDER_RM = 4 # border default=2 _CN.MATCH_COARSE.MATCH_TYPE = 'dual_softmax' # options: ['dual_softmax, 'sinkhorn'] _CN.MATCH_COARSE.DSMAX_TEMPERATURE = 0.1 _CN.MATCH_COARSE.SKH_ITERS = 3 _CN.MATCH_COARSE.SKH_INIT_BIN_SCORE = 1.0 _CN.MATCH_COARSE.SKH_PREFILTER = True _CN.MATCH_COARSE.TRAIN_COARSE_PERCENT = 0.4 # training tricks: save GPU memory _CN.MATCH_COARSE.TRAIN_PAD_NUM_GT_MIN = 200 # training tricks: avoid DDP deadlock # 4. LoFTR-fine module config _CN.FINE = CN() _CN.FINE.D_MODEL = 128 _CN.FINE.D_FFN = 128 _CN.FINE.NHEAD = 8 _CN.FINE.LAYER_NAMES = ['self', 'cross'] * 1 _CN.FINE.ATTENTION = 'linear' default_cfg = lower_config(_CN)
这里的imgutils 与前文说明的有细微区别,打开https://blog.csdn.net/a486259/article/details/124824892 将章节1的代码保存为imgutils.py即可
from src.loftr import LoFTR, default_cfg import torch from imgutils import * import time # Initialize LoFTR matcher = LoFTR(config=default_cfg) matcher.load_state_dict(torch.load("weights/indoor_ds_new.ckpt")['state_dict']) matcher = matcher.eval().cuda() p1=r'C:\Users\hpg\Pictures\t1.jpg' p2=r'C:\Users\hpg\Pictures\t2.jpg' t1,im1=read_img_as_tensor_gray(p1,(384,384)) t2,im2=read_img_as_tensor_gray(p2,(384,384)) batch = {'image0': t1, 'image1': t2} # Inference with torch.no_grad(): t0=time.time() times=1 for i in range(times): matcher(batch) rt1=time.time()-t0 rt1=rt1/times mkpts0 = batch['mkpts0_f'].cpu().numpy() mkpts1 = batch['mkpts1_f'].cpu().numpy() confidence = batch['mconf'].cpu().numpy() print(f'运行时间:{rt1:.4f}',mkpts0.shape,mkpts1.shape) import cv2 as cv pt_num = mkpts0.shape[0] im_dst,im_res=im1,im2 img = np.zeros((max(im_dst.shape[0], im_res.shape[0]), im_dst.shape[1]+im_res.shape[1]+10,3)) img[:,:im_res.shape[0],]=im_dst img[:,-im_res.shape[0]:]=im_res img=img.astype(np.uint8) match_threshold=0.6 for i in range(0, pt_num): if (confidence[i] > match_threshold): pt0 = mkpts0[i].to('cpu').numpy().astype(np.int32) pt1 = mkpts1[i].to('cpu').numpy().astype(np.int32) #cv.circle(img, (pt0[0], pt0[1]), 1, (0, 0, 255), 2) #cv.circle(img, (pt1[0], pt1[1]+650), (0, 0, 255), 2) cv.line(img, pt0, (pt1[0]+im_res.shape[0], pt1[1]), (0, 255, 0), 1) myimshow( img,size=12) import cv2 def getGoodMatchPoint(mkpts0, mkpts1, confidence, match_threshold:float=0.5): n = min(mkpts0.size(0), mkpts1.size(0)) srcImage1_matchedKPs, srcImage2_matchedKPs=[],[] if (match_threshold > 1 or match_threshold < 0): print("match_threshold error!") for i in range(n): kp0 = mkpts0[i] kp1 = mkpts1[i] pt0=(kp0[0].item(),kp0[1].item()); pt1=(kp1[0].item(),kp1[1].item()); c = confidence[i].item(); if (c > match_threshold): srcImage1_matchedKPs.append(pt0); srcImage2_matchedKPs.append(pt1); return np.array(srcImage1_matchedKPs),np.array(srcImage2_matchedKPs) pts_src, pts_dst=getGoodMatchPoint(mkpts0, mkpts1, confidence) h1, status = cv2.findHomography(pts_src, pts_dst, cv.RANSAC, 8) im_out1 = cv2.warpPerspective(im_dst, h1, (im_dst.shape[1],im_dst.shape[0])) im_out2 = cv2.warpPerspective(im_res, h1, (im_dst.shape[1],im_dst.shape[0]),16) #这里 im_res和im_out1是严格配准的状态 myimshowsCL([im_dst,im_out1,im_res,im_out2],rows=2,cols=2, size=6)
代码运行报错,因为匹配到的点太少了,无法计算转化矩阵提取重叠区
这里的imgutils 与前文说明的有细微区别,打开https://blog.csdn.net/a486259/article/details/124824892 将章节1的代码保存为imgutils.py即可
使用代码如下
from loftr_wrapper import LoFTRWrapper as LoFTR import torch from imgutils import * import time # Initialize LoFTR matcher = LoFTR() matcher.load_state_dict(torch.load("weights/indoor_ds_new.ckpt")['state_dict']) matcher = matcher.eval().cuda() p1=r'C:\Users\hpg\Pictures\t1.jpg' p2=r'C:\Users\hpg\Pictures\t2.jpg' t1,im1=read_img_as_tensor_gray(p1,(384,384)) t2,im2=read_img_as_tensor_gray(p2,(384,384)) # Inference with torch.no_grad(): #result=matcher(t1,t2) # t0=time.time() times=1 for i in range(times): result=matcher(t1,t2) rt1=time.time()-t0 rt1=rt1/times mkpts0 = result['keypoints0']#.cpu().numpy() mkpts1 = result['keypoints1']#.cpu().numpy() confidence = result['confidence']#.cpu().numpy() print(f'运行时间:{rt1:.4f}',mkpts0.shape,mkpts1.shape,confidence) import cv2 as cv pt_num = mkpts0.shape[0] im_dst,im_res=im1,im2 img = np.zeros((max(im_dst.shape[0], im_res.shape[0]), im_dst.shape[1]+im_res.shape[1]+10,3)) img[:,:im_res.shape[0],]=im_dst img[:,-im_res.shape[0]:]=im_res img=img.astype(np.uint8) match_threshold=0.01 for i in range(0, pt_num): if (confidence[i] > match_threshold): pt0 = mkpts0[i].to('cpu').numpy().astype(np.int32) pt1 = mkpts1[i].to('cpu').numpy().astype(np.int32) #cv.circle(img, (pt0[0], pt0[1]), 1, (0, 0, 255), 2) #cv.circle(img, (pt1[0], pt1[1]+650), (0, 0, 255), 2) cv.line(img, tuple(pt0.tolist()), (pt1[0]+im_res.shape[0], pt1[1]), (0, 255, 0), 1) myimshow( img,size=12) import cv2 def getGoodMatchPoint(mkpts0, mkpts1, confidence, match_threshold:float=0.5): n = min(mkpts0.size(0), mkpts1.size(0)) srcImage1_matchedKPs, srcImage2_matchedKPs=[],[] if (match_threshold > 1 or match_threshold < 0): print("match_threshold error!") for i in range(n): kp0 = mkpts0[i] kp1 = mkpts1[i] pt0=(kp0[0].item(),kp0[1].item()); pt1=(kp1[0].item(),kp1[1].item()); c = confidence[i].item(); if (c > match_threshold): srcImage1_matchedKPs.append(pt0); srcImage2_matchedKPs.append(pt1); return np.array(srcImage1_matchedKPs),np.array(srcImage2_matchedKPs) pts_src, pts_dst=getGoodMatchPoint(mkpts0, mkpts1, confidence) h1, status = cv2.findHomography(pts_src, pts_dst, cv.RANSAC, 4) # im_dst=im_dst.astype(np.float32)/255 # im_res=im_res.astype(np.float32)/255 print(im_dst.shape,im_dst.dtype,im_dst.max(),im_res.shape,im_res.dtype,im_res.max(),h1) im_out1 = cv2.warpPerspective(im_dst, h1, (im_dst.shape[1],im_dst.shape[0])) im_out2 = cv2.warpPerspective(im_res, h1, (im_dst.shape[1],im_dst.shape[0]),16) #这里 im_res和im_out1是严格配准的状态 myimshowsCL([im_dst,im_out1,im_res,im_out2],rows=2,cols=2, size=6)
提取的点对关系如下所示
提取出的重叠区域
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。