当前位置:   article > 正文

【yolov5】onnx的INT8量化engine_onnxruntime yolov5 量化

onnxruntime yolov5 量化

GitHub上有大佬写好代码,理论上直接克隆仓库里下来使用

git clone https://github.com/Wulingtian/yolov5_tensorrt_int8_tools.git

然后在yolov5_tensorrt_int8_tools的convert_trt_quant.py 修改如下参数

BATCH_SIZE 模型量化一次输入多少张图片

BATCH 模型量化次数

height width 输入图片宽和高

CALIB_IMG_DIR 训练图片路径,用于量化

onnx_model_path onnx模型路径

engine_model_path 模型保存路径

其中这个batch_size不能超过照片的数量,然后跑这个convert_trt_quant.py

出问题了吧@_@

这是因为tensor的版本更新原因,这个代码的tensorrt版本是7系列的,而目前新的tensorrt版本已经没有了一些属性,所以我们需要对这个大佬写的代码进行一些修改

如何修改呢,其实tensorrt官方给出了一个caffe量化INT8的例子

https://github.com/NVIDIA/TensorRT/tree/master/samples/python/int8_caffe_mnist

如果足够NB是可以根据官方的这个例子修改一下直接实现onnx的INT8量化的

但是奈何我连半桶水都没有,只有一滴水,但是这个例子中的tensorrt版本是新的,于是我尝试将上面那位大佬的代码修改为使用新版的tensorrt

居然成功了??!!

成功量化后的模型大小只有4MB,相比之下的FP16的大小为6MB,FP32的大小为9MB

再看看检测速度,速度和FP16差不太多

但是效果要差上一些了

那肯定不能忘记送上修改的代码,折腾一晚上的结果如下,主要是 util_trt程序

  1. # tensorrt-lib
  2. import os
  3. import tensorrt as trt
  4. import pycuda.autoinit
  5. import pycuda.driver as cuda
  6. from calibrator import Calibrator
  7. from torch.autograd import Variable
  8. import torch
  9. import numpy as np
  10. import time
  11. # add verbose
  12. TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) # ** engine可视化 **
  13. # create tensorrt-engine
  14. # fixed and dynamic
  15. def get_engine(max_batch_size=1, onnx_file_path="", engine_file_path="",\
  16. fp16_mode=False, int8_mode=False, calibration_stream=None, calibration_table_path="", save_engine=False):
  17. """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
  18. def build_engine(max_batch_size, save_engine):
  19. """Takes an ONNX file and creates a TensorRT engine to run inference with"""
  20. with trt.Builder(TRT_LOGGER) as builder, \
  21. builder.create_network(1) as network,\
  22. trt.OnnxParser(network, TRT_LOGGER) as parser:
  23. # parse onnx model file
  24. if not os.path.exists(onnx_file_path):
  25. quit('ONNX file {} not found'.format(onnx_file_path))
  26. print('Loading ONNX file from path {}...'.format(onnx_file_path))
  27. with open(onnx_file_path, 'rb') as model:
  28. print('Beginning ONNX file parsing')
  29. parser.parse(model.read())
  30. assert network.num_layers > 0, 'Failed to parse ONNX model. \
  31. Please check if the ONNX model is compatible '
  32. print('Completed parsing of ONNX file')
  33. print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
  34. # build trt engine
  35. builder.max_batch_size = max_batch_size
  36. config = builder.create_builder_config()
  37. config.max_workspace_size = 1 << 20
  38. if int8_mode:
  39. config.set_flag(trt.BuilderFlag.INT8)
  40. assert calibration_stream, 'Error: a calibration_stream should be provided for int8 mode'
  41. config.int8_calibrator = Calibrator(calibration_stream, calibration_table_path)
  42. print('Int8 mode enabled')
  43. runtime=trt.Runtime(TRT_LOGGER)
  44. plan = builder.build_serialized_network(network, config)
  45. engine = runtime.deserialize_cuda_engine(plan)
  46. if engine is None:
  47. print('Failed to create the engine')
  48. return None
  49. print("Completed creating the engine")
  50. if save_engine:
  51. with open(engine_file_path, "wb") as f:
  52. f.write(engine.serialize())
  53. return engine
  54. if os.path.exists(engine_file_path):
  55. # If a serialized engine exists, load it instead of building a new one.
  56. print("Reading engine from file {}".format(engine_file_path))
  57. with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
  58. return runtime.deserialize_cuda_engine(f.read())
  59. else:
  60. return build_engine(max_batch_size, save_engine)

唔,convert_trt_quant.py的代码也给一下吧

  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. import util_trt
  5. import glob,os,cv2
  6. BATCH_SIZE = 1
  7. BATCH = 79
  8. height = 640
  9. width = 640
  10. CALIB_IMG_DIR = '/content/drive/MyDrive/yolov5/ikunData/images'
  11. onnx_model_path = "runs/train/exp4/weights/FP32.onnx"
  12. def preprocess_v1(image_raw):
  13. h, w, c = image_raw.shape
  14. image = cv2.cvtColor(image_raw, cv2.COLOR_BGR2RGB)
  15. # Calculate widht and height and paddings
  16. r_w = width / w
  17. r_h = height / h
  18. if r_h > r_w:
  19. tw = width
  20. th = int(r_w * h)
  21. tx1 = tx2 = 0
  22. ty1 = int((height - th) / 2)
  23. ty2 = height - th - ty1
  24. else:
  25. tw = int(r_h * w)
  26. th = height
  27. tx1 = int((width - tw) / 2)
  28. tx2 = width - tw - tx1
  29. ty1 = ty2 = 0
  30. # Resize the image with long side while maintaining ratio
  31. image = cv2.resize(image, (tw, th))
  32. # Pad the short side with (128,128,128)
  33. image = cv2.copyMakeBorder(
  34. image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128)
  35. )
  36. image = image.astype(np.float32)
  37. # Normalize to [0,1]
  38. image /= 255.0
  39. # HWC to CHW format:
  40. image = np.transpose(image, [2, 0, 1])
  41. # CHW to NCHW format
  42. #image = np.expand_dims(image, axis=0)
  43. # Convert the image to row-major order, also known as "C order":
  44. #image = np.ascontiguousarray(image)
  45. return image
  46. def preprocess(img):
  47. img = cv2.resize(img, (640, 640))
  48. img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  49. img = img.transpose((2, 0, 1)).astype(np.float32)
  50. img /= 255.0
  51. return img
  52. class DataLoader:
  53. def __init__(self):
  54. self.index = 0
  55. self.length = BATCH
  56. self.batch_size = BATCH_SIZE
  57. # self.img_list = [i.strip() for i in open('calib.txt').readlines()]
  58. self.img_list = glob.glob(os.path.join(CALIB_IMG_DIR, "*.jpg"))
  59. assert len(self.img_list) > self.batch_size * self.length, '{} must contains more than '.format(CALIB_IMG_DIR) + str(self.batch_size * self.length) + ' images to calib'
  60. print('found all {} images to calib.'.format(len(self.img_list)))
  61. self.calibration_data = np.zeros((self.batch_size,3,height,width), dtype=np.float32)
  62. def reset(self):
  63. self.index = 0
  64. def next_batch(self):
  65. if self.index < self.length:
  66. for i in range(self.batch_size):
  67. assert os.path.exists(self.img_list[i + self.index * self.batch_size]), 'not found!!'
  68. img = cv2.imread(self.img_list[i + self.index * self.batch_size])
  69. img = preprocess_v1(img)
  70. self.calibration_data[i] = img
  71. self.index += 1
  72. # example only
  73. return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
  74. else:
  75. return np.array([])
  76. def __len__(self):
  77. return self.length
  78. def main():
  79. # onnx2trt
  80. fp16_mode = False
  81. int8_mode = True
  82. print('*** onnx to tensorrt begin ***')
  83. # calibration
  84. calibration_stream = DataLoader()
  85. engine_model_path = "runs/train/exp4/weights/int8.engine"
  86. calibration_table = 'yolov5_tensorrt_int8_tools/models_save/calibration.cache'
  87. # fixed_engine,校准产生校准表
  88. engine_fixed = util_trt.get_engine(BATCH_SIZE, onnx_model_path, engine_model_path, fp16_mode=fp16_mode,
  89. int8_mode=int8_mode, calibration_stream=calibration_stream, calibration_table_path=calibration_table, save_engine=True)
  90. assert engine_fixed, 'Broken engine_fixed'
  91. print('*** onnx to tensorrt completed ***\n')
  92. if __name__ == '__main__':
  93. main()
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/725339
推荐阅读
相关标签
  

闽ICP备14008679号