赞
踩
论文题目:YOLOv10: Real-Time End-to-End Object Detection
研究单位:清华大学
论文链接:http://arxiv.org/abs/2405.14458
代码链接:https://github.com/THU-MIG/yolov10
作者提供的模型性能评价图,如下:
YOLOv10-N:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt
YOLOv10-S:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
YOLOv10-M:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt
YOLOv10-B:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt
YOLOv10-L:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt
YOLOv10-X:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt
推理时间速度很快,最主要是不需要后处理,就是网络比较难训练,有spa多占用了几g显存,并且收敛较慢
win10、TensorRT=8.6.1
git clone https://github.com/THU-MIG/yolov10.git
conda create -n YOLO python=3.9
conda activate YOLO
cd yolov10
pip install -r requirements.txt
下载pt模型
用下面代码转化
# -*- coding: utf-8 -*- # @Time : 2024/6/13 10:54 # @Site : # @File : export.py # @Comment : from ultralytics import YOLOv10 # Load a model model = YOLOv10(r"yolov10s.pt") # load an official model # Export the model model.export(format="onnx",device='0',batch=2,opset=12, half=True) """ Argument Type Default Description format str 'torchscript' Target format for the exported model, such as 'onnx', 'torchscript', 'tensorflow', or others, defining compatibility with various deployment environments. imgsz int or tuple 640 Desired image size for the model input. Can be an integer for square images or a tuple (height, width) for specific dimensions. keras bool False Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs. optimize bool False Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance. half bool False Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware. int8 bool False Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices. dynamic bool False Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions. simplify bool False Simplifies the model graph for ONNX exports with onnxslim, potentially improving performance and compatibility. opset int None Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version. workspace float 4.0 Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance. nms bool False Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing. batch int 1 Specifies export model batch inference size or the max number of images the exported model will process concurrently in predict mode. """
import onnx import tensorrt as trt # import sys # sys.setrecursionlimit(500000) def onnx_export_engine(workspace,onnx_path,trt_path): #创建构建器 logger=trt.Logger(trt.Logger.WARNING) builder=trt.Builder(logger) #创建一个构建配置 config=builder.create_builder_config() config.max_workspace_size=workspace*1<<30 #创建网络定义 flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network=builder.create_network(flag) #导入onnx模型 parser=trt.OnnxParser(network,logger) if not parser.parse_from_file(str(onnx_path)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs=[network.get_input(i) for i in range(network.num_inputs)] outputs=[network.get_output(i) for i in range(network.num_outputs)] # network.get_input(0).setAllowedFormats(int) # network.get_input(1).setAllowedFormats(int) # for inp in inputs: # LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') # for out in outputs: # LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') # # LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}') # if builder.platform_has_fast_fp16: # # config.set_flag(trt.BuilderFlag.FP16) # config.set_flag(trt.BuilderFlag.FP16) engine_path=trt_path with builder.build_serialized_network(network,config) as engine: with open(engine_path,'wb') as t: # t.write(engine.serialize()) t.write(engine) print('转化完成') if __name__ == '__main__': onnx_path='weights2/best.onnx' trt_path='end2end.engine' onnx_export_engine(4,onnx_path,trt_path)
from models import TRTModule # isort:skip import argparse import cv2 from numpy import ndarray import time import random import numpy as np import os import pickle from collections import defaultdict, namedtuple from pathlib import Path from typing import List, Optional, Tuple, Union import onnx import tensorrt as trt import torch os.environ['CUDA_MODULE_LOADING'] = 'LAZY' random.seed(0) # detection model classes CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') # # three: # CLASSES = ( # 'person', 'sports ball', 'car' # ) # colors for per classes COLORS = { cls: [random.randint(0, 255) for _ in range(3)] for i, cls in enumerate(CLASSES) } # image suffixs SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff', '.webp', '.pfm')
class TRTModule(torch.nn.Module): dtypeMapping = { trt.bool: torch.bool, trt.int8: torch.int8, trt.int32: torch.int32, trt.float16: torch.float16, trt.float32: torch.float32 } def __init__(self, weight: Union[str, Path], device: Optional[torch.device]) -> None: super(TRTModule, self).__init__() self.weight = Path(weight) if isinstance(weight, str) else weight self.device = device if device is not None else torch.device('cuda:0') self.stream = torch.cuda.Stream(device=device) self.__init_engine() self.__init_bindings() def __init_engine(self) -> None: logger = trt.Logger(trt.Logger.WARNING) trt.init_libnvinfer_plugins(logger, namespace='') with trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) context = model.create_execution_context() num_bindings = model.num_bindings names = [model.get_binding_name(i) for i in range(num_bindings)] self.bindings: List[int] = [0] * num_bindings num_inputs, num_outputs = 0, 0 for i in range(num_bindings): if model.binding_is_input(i): num_inputs += 1 else: num_outputs += 1 self.num_bindings = num_bindings self.num_inputs = num_inputs self.num_outputs = num_outputs self.model = model self.context = context self.input_names = names[:num_inputs] self.output_names = names[num_inputs:] self.idx = list(range(self.num_outputs)) def __init_bindings(self) -> None: idynamic = odynamic = False Tensor = namedtuple('Tensor', ('name', 'dtype', 'shape')) inp_info = [] out_info = [] for i, name in enumerate(self.input_names): assert self.model.get_binding_name(i) == name dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] shape = tuple(self.model.get_binding_shape(i)) if -1 in shape: idynamic |= True inp_info.append(Tensor(name, dtype, shape)) for i, name in enumerate(self.output_names): i += self.num_inputs assert self.model.get_binding_name(i) == name dtype = self.dtypeMapping[self.model.get_binding_dtype(i)] shape = tuple(self.model.get_binding_shape(i)) if -1 in shape: odynamic |= True out_info.append(Tensor(name, dtype, shape)) if not odynamic: self.output_tensor = [ torch.empty(info.shape, dtype=info.dtype, device=self.device) for info in out_info ] self.idynamic = idynamic self.odynamic = odynamic self.inp_info = inp_info self.out_info
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。