赞
踩
这是一篇基于YOLOv5的对象检测代码的介绍。该代码是由Python编写的,用于管理和操作YOLOv5模型。这个类库的主要功能是提供一个方便的接口用于加载训练好的模型,处理输入的图像,并进行推理。此外,它还可以将检测结果绘制到原始图像上,以便于进行可视化。
首先,我们需要导入一些必要的库,包括OpenCV,Torch,以及YOLOv5的相关模块。这些库用于图像处理,深度学习模型操作,以及一些工具函数。
- #!/usr/bin/python3
- # -*- coding: utf-8 -*-
- import glob
- import sys
- from pathlib import Path
- import os
-
- FILE = Path(__file__).resolve()
- ROOT = FILE.parents[0] # YOLOv5 root directory
- if str(ROOT) not in sys.path:
- sys.path.append(str(ROOT)) # add ROOT to PATH
- ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
- import cv2
- import torch
- import torch.backends.cudnn as cudnn
- from models.common import DetectMultiBackend
- from utils.general import (check_img_size, non_max_suppression, scale_coords)
- from utils.torch_utils import select_device
- import numpy as np
- from utils.augmentations import letterbox
- from time import sleep
在Yolov5Manager类的初始化函数中,我们设置了模型的参数,包括权重文件的路径,标签名,图像大小,置信度阈值,IOU阈值,设备类型,以及是否使用半精度计算。然后,我们调用了DetectMultiBackend
函数(Yolo原版)来加载模型,并根据所选择的设备(CPU或GPU)以及计算精度来设置模型。
- class Yolov5Manager(object):
- def __init__(self, weights=r'', names=[], imgsz=[640, 640], conf_thres=0.3,
- half=True, iou_thres=0.2,
- device='0',
- dnn=False, data=None):
-
- self.names = names
- self.half = half
- self.conf_thres = conf_thres
- self.iou_thres = iou_thres
- self.device = select_device(device)
- self.model = DetectMultiBackend(weights, device=self.device, dnn=dnn, data=data)
- self.stride, pt, jit, onnx, engine = self.model.stride, self.model.pt, self.model.jit, self.model.onnx, self.model.engine # endine:False onnx:False pt:True jit:False
- if self.names is None or len(self.names) == 0:
- self.names = self.model.names
-
- self.imgsz = check_img_size(imgsz, s=self.stride)
- self.auto = True #
- self.half &= (
- pt or jit or onnx or engine) and self.device.type != 'cpu' # FP16 supported on limited backends with CUDA
- if pt or jit:
- self.model.model.half() if self.half else self.model.model.float()
- cudnn.benchmark = True # set True to speed up constant image size inference
- self.model.warmup(imgsz=(1, 3, *imgsz), half=self.half) # warmup
我们还定义了一个内部函数__draw_image
,它接受一个OpenCV格式的图像,一个表示检测框位置的列表,以及一些可选的参数,如标签,线条宽度,和颜色。这个函数会在图像上绘制检测框和标签。
- def __draw_image(self, opencv_img, box, label='', line_width=None, box_color=(255, 255, 255),
- txt_box_color=(200, 200, 200),
- txt_color=(0, 0, 255)):
- lw = line_width or max(round(sum(opencv_img.shape) / 2 * 0.005), 2) # line width
- p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
-
- cv2.rectangle(opencv_img, p1, p2, box_color, thickness=lw, lineType=cv2.LINE_AA)
- if label:
- tf = max(lw - 1, 1) # font thickness
- w, h = cv2.getTextSize(label, 0, fontScale=lw / 4, thickness=tf)[0] # text width, height
- outside = p1[1] - h - 1 >= 0 # label fits outside bo
- p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
- cv2.rectangle(opencv_img, p1, p2, txt_box_color, -1, cv2.LINE_AA) # filled 背景
- label = label.split(',')[0]
- cv2.putText(opencv_img, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 4, txt_color,
- thickness=tf, lineType=cv2.LINE_AA)
- return opencv_img
此外,inference_image
函数接受一个OpenCV格式的图像,并将其预处理为模型可以接受的格式,然后进行推理。最后,它调用non_max_suppression
函数来进行非极大值抑制,并返回检测结果。
- def inference_image(self, opencv_img):
- img = letterbox(opencv_img, self.imgsz, stride=self.stride, auto=self.auto)[0]
- img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
- img = np.ascontiguousarray(img)
- img = torch.from_numpy(img).to(self.device)
- img = img.half() if self.half else img.float() # uint8 to fp16/32
- img /= 255 # 0 - 255 to 0.0 - 1.0
- if len(img.shape) == 3:
- img = img[None] # expand for batch dim
- pred = self.model(img, augment=False, visualize=False)
- pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, None, False, max_det=100)
- result_list = []
- # Process predictions
- for i, det in enumerate(pred): # per image
- if len(det):
- # Rescale boxes from img_size to im0 size
- det[:, :4] = scale_coords(img.shape[2:], det[:, :4], opencv_img.shape).round()
- for *xyxy, conf, cls in reversed(det):
- result_list.append(
- [self.names[int(cls)], round(float(conf), 2), int(xyxy[0]), int(xyxy[1]), int(xyxy[2]),
- int(xyxy[3])])
- return result_list
我们还提供了一些实用的函数,如start_camera
,start_video
,和start_video_and_save
。这些函数可以分别从摄像头,视频文件,或者保存视频文件中读取图像,并进行推理和绘图。
- @torch.no_grad()
- def start_camera(self, camera_index=0):
- cap = cv2.VideoCapture(camera_index)
- while True:
- ret, frame = cap.read()
- if not ret:
- break
- result_list = self.inference_image(frame)
- frame = self.draw_image(result_list, frame)
- cv2.imshow('frame', frame)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- cap.release()
- cv2.destroyAllWindows()
- @torch.no_grad()
- def start_video(self, video_file):
- cap = cv2.VideoCapture(video_file)
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- print('ret is False')
- break
- result_list = self.inference_image(frame)
- frame = self.draw_image(result_list, frame)
- cv2.imshow('frame', frame)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- cap.release()
- cv2.destroyAllWindows()
- @torch.no_grad()
- def start_video_and_save(self, video_file, save_file, show=True):
- cap = cv2.VideoCapture(video_file)
- # 获取视频帧速率 FPS
- frame_fps = int(cap.get(cv2.CAP_PROP_FPS))
- # 获取视频帧宽度和高度
- frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- print("video fps={},width={},height={}".format(frame_fps, frame_width, frame_height))
- fourcc = cv2.VideoWriter_fourcc(*'XVID')
- out = cv2.VideoWriter(save_file, fourcc, frame_fps, (frame_width, frame_height))
- count = 0
- while cap.isOpened():
- ret, frame = cap.read()
- if not ret:
- print("read over or error!")
- break
- result_list = self.inference_image(frame)
- frame = self.draw_image(result_list, frame)
- out.write(frame)
- if show:
- cv2.imshow("result", frame)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- out.release()
- cap.release()
- cv2.destroyAllWindows()
load_labels
函数可以从一个文本文件中加载标签名。
- @classmethod
- def load_labels(cls, name_file):
- with open(name_file, 'r') as f:
- lines = f.read().rstrip('\n').split('\n')
- return lines
最后,在主程序中,我们实例化了一个Yolov5Manager对象,并使用它来进行一些实际的检测任务。例如,我们可以从摄像头中读取图像,并实时进行检测和绘图。我们也可以从视频文件中读取图像,进行检测和绘图,并将结果保存为一个新的视频文件。
- if __name__ == '__main__':
- infer = Yolov5Manager(weights=r'yolov5s.pt',conf_thres=0.3,half=True,
- iou_thres=0.2,device='0',)
- beg = time.time()
- img = r'cccccc.png'
- img = cv2.imread(img)
- result_list = infer.inference_image(img)
- infer.imshow(img, result_list)
- print(result_list)
这个代码库提供了一个非常方便的接口,使得我们可以轻松地使用YOLOv5模型进行对象检测。我们可以通过修改和扩展这个代码库来满足我们的特定需求。
在此,我想推荐大家加入我们的YOLO目标检测交流学习群。群号是732818397。在这个群里,我们可以一起学习和探讨关于YOLO目标检测的各种问题和挑战。无论你是初学者还是有经验的专业人士,我们都欢迎你的加入。希望我们能在学习和交流的过程中共同进步,共同提高。期待在群里遇见你。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。