赞
踩
YOLOv8的ONNX模型推理是指使用ONNX(Open Neural Network Exchange)格式的YOLOv8模型来进行对象检测的推断过程。ONNX是一种跨平台的深度学习模型格式,支持多种框架之间的模型转换和运行,使得模型能够在不同的硬件和软件平台上高效执行
使用Ultralytics的YOLO库来加载一个YOLOv8的PyTorch模型 导出为ONNX格式
- from ultralytics import YOLO
-
- model = YOLO(yolov8n-pose.pt")
- model.export(format="onnx") # export the model to onnx format
-
准备coco128.yaml文件来存放类别
代码如下:
onnx_inference
- import os
- import time
- import random
- from tool import *
-
-
- def main():
- model_path = "yolov8n.onnx"
- session, model_inputs, input_width, input_height = init_detect_model(model_path)
-
- modes = {
- 1: process_images,
- 2: webcam_detection,
- 3: video_processing
- }
-
- mode = 1
- if mode in modes:
- modes[mode](session, model_inputs, input_width, input_height)
- else:
- print("Invalid mode. Please choose from 1, 2, or 3.")
-
-
- def process_images(session, model_inputs, input_width, input_height):
- image_dir = './images'
- image_list = os.listdir(image_dir)
- random.shuffle(image_list)
- for image_item in image_list:
- path = os.path.join(image_dir, image_item)
- im0 = cv2.imread(path)
- result_image = detect_object(im0, session, model_inputs, input_width, input_height)
- cv2.imwrite("output_image.jpg", result_image)
- cv2.imshow('Output', result_image)
- cv2.waitKey(0)
-
-
- def webcam_detection(session, model_inputs, input_width, input_height):
- cap = cv2.VideoCapture(0)
- if not cap.isOpened():
- print("Error: Could not open camera.")
- return
- frame_count, start_time = 0, time.time()
- while True:
- ret, frame = cap.read()
- if not ret:
- print("Error: Could not read frame.")
- break
- output_image = detect_object(frame, session, model_inputs, input_width, input_height)
- frame_count += 1
- elapsed_time = time.time() - start_time
- fps = frame_count / elapsed_time
- cv2.putText(output_image, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2,
- cv2.LINE_AA)
- cv2.imshow("Video", output_image)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- cap.release()
- cv2.destroyAllWindows()
-
-
- def video_processing(session, model_inputs, input_width, input_height):
- input_video_path = 'kun1.mp4'
- output_video_path = 'kun_det1.mp4'
- cap = cv2.VideoCapture(input_video_path)
- if not cap.isOpened():
- print("Error: Could not open video.")
- return
- frame_width = int(cap.get(3))
- frame_height = int(cap.get(4))
- fps = cap.get(cv2.CAP_PROP_FPS)
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
- out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
- frame_count, start_time = 0, time.time()
- while True:
- ret, frame = cap.read()
- if not ret:
- print("Info: End of video file.")
- break
- output_image = detect_object(frame, session, model_inputs, input_width, input_height)
- frame_count += 1
- elapsed_time = time.time() - start_time
- fps = frame_count / elapsed_time if elapsed_time > 0 else 0
- print(f"FPS: {fps:.2f}")
- out.write(output_image)
- cv2.imshow("Output Video", output_image)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- cap.release()
- out.release()
- cv2.destroyAllWindows()
-
-
- if __name__ == "__main__":
- main()
tool
- import cv2
- import yaml
- import torch.cuda
- import numpy as np
- from PIL import Image
- import onnxruntime as ort
-
-
- # iou阈值
- iou_thresh = 0.6
- # 置信度
- confidence_thresh = 0.55
- # 类别
- label_path='coco128.yaml'
-
-
-
- #读取yaml文件
- def yaml_load(file=label_path):
- with open(file,errors='ignore') as f:
- return yaml.safe_load(f)
-
- classes = yaml_load(label_path)['names']
-
- color_palette = np.random.uniform(100, 255, size=(len(classes), 3))
-
- cuda = torch.cuda.is_available()
- providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
-
-
- def calculate_iou(box, other_boxes):
-
- # # top left x y
- x1 = np.maximum(box[0], np.array(other_boxes)[:, 0])
- y1 = np.maximum(box[1], np.array(other_boxes)[:, 1])
- # bottom right x y
- x2 = np.minimum(box[0] + box[2], np.array(other_boxes)[:, 0] + np.array(other_boxes)[:, 2])
- y2 = np.minimum(box[1] + box[3], np.array(other_boxes)[:, 1] + np.array(other_boxes)[:, 3])
- # 计算交集区域的面积
- intersection_area = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
- # 计算给定边界框的面积
- box_area = box[2] * box[3]
- # 计算其他边界框的面积
- other_boxes_area = np.array(other_boxes)[:, 2] * np.array(other_boxes)[:, 3]
- # 计算IoU值
- iou = intersection_area / (box_area + other_boxes_area - intersection_area)
- return iou
-
- def custom_NMSBoxes(boxes, scores, confidence_threshold, iou_threshold):
- # 如果没有边界框,则直接返回空列表
- if len(boxes) == 0:
- return []
- # 将得分和边界框转换为NumPy数组
- scores = np.array(scores)
- boxes = np.array(boxes)
- # 根据置信度阈值过滤边界框
- mask = scores > confidence_threshold
- filtered_boxes = boxes[mask]
- filtered_scores = scores[mask]
- # 如果过滤后没有边界框,则返回空列表
- if len(filtered_boxes) == 0:
- return []
- # 根据置信度得分对边界框进行排序
- sorted_indices = np.argsort(filtered_scores)[::-1]
- # 初始化一个空列表来存储选择的边界框索引
- indices = []
- # 当还有未处理的边界框时,循环继续
- while len(sorted_indices) > 0:
- # 选择得分最高的边界框索引
- current_index = sorted_indices[0]
- indices.append(current_index)
- # 如果只剩一个边界框,则结束循环
- if len(sorted_indices) == 1:
- break
- # 获取当前边界框和其他边界框
- current_box = filtered_boxes[current_index]
- other_boxes = filtered_boxes[sorted_indices[1:]]
- # 计算当前边界框与其他边界框的IoU
- iou = calculate_iou(current_box, other_boxes)
- # 找到IoU低于阈值的边界框,即与当前边界框不重叠的边界框
- non_overlapping_indices = np.where(iou <= iou_threshold)[0]
- # 更新sorted_indices以仅包含不重叠的边界框
- sorted_indices = sorted_indices[non_overlapping_indices + 1]
- # 返回选择的边界框索引
- return indices
-
-
- def draw_detections(img, box, score, class_id):
-
- # 提取边界框的坐标
- x1, y1, w, h = box
- # 根据类别ID检索颜色
- color = color_palette[class_id]
- # 在图像上绘制边界框
- cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
- # 创建标签文本,包括类名和得分
- label = f'{classes[class_id]}: {score:.2f}'
- # 计算标签文本的尺寸
- (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
- # 计算标签文本的位置
- label_x = x1
- label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
- # 绘制填充的矩形作为标签文本的背景
- cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)
- # 在图像上绘制标签文本
- cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
-
-
- def preprocess(img, input_width, input_height):
-
- # 获取输入图像的高度和宽度
- img_height, img_width = img.shape[:2]
- # 将图像颜色空间从BGR转换为RGB
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
- # 将图像大小调整为匹配输入形状
- img = cv2.resize(img, (input_width, input_height))
- # 通过除以255.0来归一化图像数据
- image_data = np.array(img) / 255.0
- # 转置图像,使通道维度为第一维
- image_data = np.transpose(image_data, (2, 0, 1)) # 通道首
- # 扩展图像数据的维度以匹配预期的输入形状
- image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
- # 返回预处理后的图像数据
- return image_data, img_height, img_width
-
- def postprocess(input_image, output, input_width, input_height, img_width, img_height):
-
- # 转置和压缩输出以匹配预期的形状
- outputs = np.transpose(np.squeeze(output[0]))
- # 获取输出数组的行数
- rows = outputs.shape[0]
- # 用于存储检测的边界框、得分和类别ID的列表
- boxes = []
- scores = []
- class_ids = []
- # 计算边界框坐标的缩放因子
- x_factor = img_width / input_width
- y_factor = img_height / input_height
- # 遍历输出数组的每一行
- for i in range(rows):
- # 从当前行提取类别得分
- classes_scores = outputs[i][4:]
- # 找到类别得分中的最大得分
- max_score = np.amax(classes_scores)
- # 如果最大得分高于置信度阈值
- if max_score >= confidence_thresh:
- # 获取得分最高的类别ID
- class_id = np.argmax(classes_scores)
- # 从当前行提取边界框坐标
- x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
- # 计算边界框的缩放坐标
- left = int((x - w / 2) * x_factor)
- top = int((y - h / 2) * y_factor)
- width = int(w * x_factor)
- height = int(h * y_factor)
- # 将类别ID、得分和框坐标添加到各自的列表中
- class_ids.append(class_id)
- scores.append(max_score)
- boxes.append([left, top, width, height])
- # 应用非最大抑制过滤重叠的边界框
- indices = custom_NMSBoxes(boxes, scores, confidence_thresh, iou_thresh)
- # 遍历非最大抑制后的选定索引
- for i in indices:
- # 根据索引获取框、得分和类别ID
- box = boxes[i]
- score = scores[i]
- class_id = class_ids[i]
- # 在输入图像上绘制检测结果
- draw_detections(input_image, box, score, class_id)
- # 返回修改后的输入图像
- return input_image
-
- def init_detect_model(model_path):
- # 使用ONNX模型文件创建一个推理会话,并指定执行提供者
- session = ort.InferenceSession(model_path, providers=providers)
- # 获取模型的输入信息
- model_inputs = session.get_inputs()
- # 获取输入的形状,用于后续使用
- input_shape = model_inputs[0].shape
- # 从输入形状中提取输入宽度
- input_width = input_shape[2]
- # 从输入形状中提取输入高度
- input_height = input_shape[3]
- # 返回会话、模型输入信息、输入宽度和输入高度
- return session, model_inputs, input_width, input_height
-
- def detect_object(image, session, model_inputs, input_width, input_height):
- # 如果输入的图像是PIL图像对象,将其转换为NumPy数组
- if isinstance(image, Image.Image):
- result_image = np.array(image)
- else:
- # 否则,直接使用输入的图像(假定已经是NumPy数组)
- result_image = image
- # 预处理图像数据,调整图像大小并可能进行归一化等操作
- img_data, img_height, img_width = preprocess(result_image, input_width, input_height)
- # 使用预处理后的图像数据进行推理
- outputs = session.run(None, {model_inputs[0].name: img_data})
- # 对推理结果进行后处理,例如解码检测框,过滤低置信度的检测等
- output_image = postprocess(result_image, outputs, input_width, input_height, img_width, img_height)
- # 返回处理后的图像
-
- return output_image
-
-
-
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。