- import time
- import matplotlib
- matplotlib.use('TkAgg')
- import matplotlib.pyplot as plt
- import torch
- import cv2
- from torchvision import transforms
- import numpy as np
- from utils.datasets import letterbox
- from utils.general import non_max_suppression_kpt
- from utils.plots import output_to_keypoint, plot_skeleton_kpts
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- weigths = torch.load('yolov7-w6-pose.pt')
- model = weigths['model']
- model = model.half().to(device) if device.type != "cpu" else model.float().to(device)
- _ = model.eval()
- # 读取摄像头画面
- print(device)
- url = 0
- cap = cv2.VideoCapture(url)
- while (cap.isOpened()):
- ret, image = cap.read()
- # image = cv2.imread('xiaolu.jpg')
- image = letterbox(image, 960, stride=64, auto=True)[0]
- image_ = image.copy()
- image = transforms.ToTensor()(image)
- image = torch.tensor(np.array([image.numpy()]))
- image = image.to(device)
- image = image.half() if device.type != "cpu" else image.float()
- # 姿势识别
- t1 = time.time()
- with torch.no_grad():
- output, _ = model(image)
- output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'],
- kpt_label=True)
- output = output_to_keypoint(output)
- nimg = image[0].permute(1, 2, 0) * 255
- nimg = nimg.cpu().numpy().astype(np.uint8)
- nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
- for idx in range(output.shape[0]):
- plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
- # 打开摄像头
- cv2.namedWindow("ning", cv2.WINDOW_NORMAL)
- cv2.imshow("ning", nimg)
- t2 = time.time()
- print(f'Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference')
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- cap.release()
- cv2.destroyAllWindows()
- # image = cv2.imread('bus.jpg')
- # image = letterbox(image, 960, stride=64, auto=True)[0]
- # image_ = image.copy()
- # image = transforms.ToTensor()(image)
- # image = torch.tensor(np.array([image.numpy()]))
- # image = image.to(device)
- # image = image.half()
- #
- # output, _ = model(image)
- #
- # output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
- # with torch.no_grad():
- # output = output_to_keypoint(output)
- # nimg = image[0].permute(1, 2, 0) * 255
- # nimg = nimg.cpu().numpy().astype(np.uint8)
- # nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
- # for idx in range(output.shape[0]):
- # plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
- # plt.figure(figsize=(8, 8))
- # plt.axis('off')
- # plt.imshow(nimg)
- # plt.show()
- # cv2.imshow('nimg',nimg)
- # cv2.waitKey(100000)

分割的部分依赖于 facebook
的 detectron2
,而 detectron2
要求 torch
版本大于 1.8,由于之前我一直用的都是 1.7.1,因此这里需要创建一个新的环境
- # 创建新的虚拟环境
- conda create -n pytorch1.8 python=3.8
- conda activate pytorch1.8
- # 安装 torch 1.8.2
- pip install torch==1.8.2 torchvision==0.9.2 torchaudio===0.8.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cu111
- # 修改requirements.txt,将其中的torch和torchvision注释掉
- pip install -r requirements.txt
- # 安装detectron2
- git clone https://github.com/facebookresearch/detectron2
- cd detectron2
- python setup.py install
- cd ..
然后去下载实例分割的模型 https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-mask.pt,将模型放入源码目录中
分割的示例代码在 tools/instance.ipynb,可以在 jupyter notebook 中直接运行
如果需要将 ipynb 文件转成 python 文件,就执行
jupyter nbconvert --to python tools/instance.ipynb
将生成的 tools/instance.py 拷贝到源码根目录下,然后修改文件的最后显示部分为保存结果图片
- import matplotlib.pyplot as plt
- import torch
- import cv2
- import yaml
- from torchvision import transforms
- import numpy as np
- from utils.datasets import letterbox
- from utils.general import non_max_suppression_mask_conf
- from detectron2.modeling.poolers import ROIPooler
- from detectron2.structures import Boxes
- from detectron2.utils.memory import retry_if_cuda_oom
- from detectron2.layers import paste_masks_in_image
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- with open('data/hyp.scratch.mask.yaml') as f:
- hyp = yaml.load(f, Loader=yaml.FullLoader)
- weigths = torch.load('yolov7-mask.pt')
- model = weigths['model']
- model = model.half().to(device)
- _ = model.eval()
- image = cv2.imread('inference/images/horses.jpg') # 504x378 image
- image = letterbox(image, 640, stride=64, auto=True)[0]
- image_ = image.copy()
- image = transforms.ToTensor()(image)
- image = torch.tensor(np.array([image.numpy()]))
- image = image.to(device)
- image = image.half()
- output = model(image)
- inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], output['mask_iou'], output['bases'], output['sem']
- bases = torch.cat([bases, sem_output], dim=1)
- nb, _, height, width = image.shape
- names = model.names
- pooler_scale = model.pooler_scale
- pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)
- output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)
- pred, pred_masks = output[0], output_mask[0]
- base = bases[0]
- bboxes = Boxes(pred[:, :4])
- original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
- pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5)
- pred_masks_np = pred_masks.detach().cpu().numpy()
- pred_cls = pred[:, 5].detach().cpu().numpy()
- pred_conf = pred[:, 4].detach().cpu().numpy()
- nimg = image[0].permute(1, 2, 0) * 255
- nimg = nimg.cpu().numpy().astype(np.uint8)
- nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
- nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int)
- pnimg = nimg.copy()
- for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf):
- if conf < 0.25:
- continue
- color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]
- pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5
- pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
- #label = '%s %.3f' % (names[int(cls)], conf)
- #t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0]
- #c2 = bbox[0] + t_size[0], bbox[1] - t_size[1] - 3
- #pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), c2, color, -1, cv2.LINE_AA) # filled
- #pnimg = cv2.putText(pnimg, label, (bbox[0], bbox[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA)
- cv2.imwrite("instance_result.jpg", pnimg)

- import torch
- import cv2
- import yaml
- from torchvision import transforms
- import numpy as np
- from utils.datasets import letterbox
- from utils.general import non_max_suppression_mask_conf
- from detectron2.modeling.poolers import ROIPooler
- from detectron2.structures import Boxes
- from detectron2.utils.memory import retry_if_cuda_oom
- from detectron2.layers import paste_masks_in_image
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- with open('data/hyp.scratch.mask.yaml') as f:
- hyp = yaml.load(f, Loader=yaml.FullLoader)
- weigths = torch.load('yolov7-mask.pt')
- model = weigths['model']
- model = model.half().to(device)
- _ = model.eval()
- cap = cv2.VideoCapture('vehicle_test.mp4')
- if (cap.isOpened() == False):
- print('open failed.')
- exit(-1)
- # 分辨率
- frame_width = int(cap.get(3))
- frame_height = int(cap.get(4))
- # 图片缩放
- vid_write_image = letterbox(cap.read()[1], (frame_width), stride=64, auto=True)[0]
- resize_height, resize_width = vid_write_image.shape[:2]
- # 保存结果视频
- out = cv2.VideoWriter("result_instance.mp4",
- cv2.VideoWriter_fourcc(*'mp4v'), 30,
- (resize_width, resize_height))
- while(cap.isOpened):
- flag, image = cap.read()
- if flag:
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- image = letterbox(image, frame_width, stride=64, auto=True)[0]
- image_ = image.copy()
- image = transforms.ToTensor()(image)
- image = torch.tensor(np.array([image.numpy()]))
- image = image.to(device)
- image = image.half()
- with torch.no_grad():
- output = model(image)
- inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], output['mask_iou'], output['bases'], output['sem']
- bases = torch.cat([bases, sem_output], dim=1)
- nb, _, height, width = image.shape
- names = model.names
- pooler_scale = model.pooler_scale
- pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1, pooler_type='ROIAlignV2', canonical_level=2)
- output, output_mask, output_mask_score, output_ac, output_ab = non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)
- pred, pred_masks = output[0], output_mask[0]
- base = bases[0]
- bboxes = Boxes(pred[:, :4])
- original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
- pred_masks = retry_if_cuda_oom(paste_masks_in_image)( original_pred_masks, bboxes, (height, width), threshold=0.5)
- pred_masks_np = pred_masks.detach().cpu().numpy()
- pred_cls = pred[:, 5].detach().cpu().numpy()
- pred_conf = pred[:, 4].detach().cpu().numpy()
- nimg = image[0].permute(1, 2, 0) * 255
- nimg = nimg.cpu().numpy().astype(np.uint8)
- nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
- nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int)
- pnimg = nimg.copy()
- for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf):
- if conf < 0.25:
- continue
- color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]
- pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5
- pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
- cv2.imshow('YOLOv7 mask', pnimg)
- out.write(pnimg)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- else:
- break
- cap.release()
- cv2.destroyAllWindows()

