赞
踩
TensorRT配置
#下载pycuda conda install pycuda #下载 TensorRT框架 pip install /home/s4/Downloads/TensorRT/TensorRT-7.2.3.4/python/tensorrt-7.2.3.4-py38-none-linux_x86_64.whl # 添加系统路径 sudo gedit ~/.bashrc export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/s4/Downloads/TensorRT/TensorRT-7.2.3.4/lib export LIBRARY_PATH=/home/s4/Downloads/TensorRT/TensorRT-7.2.3.4/lib::$LIBRARY_PATH source ~/.bashrc
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
pt模型转通用onnx模型
import torch import torch.onnx as onnx def convert2onnx(model, input_size, batch_size, save_path): """ this function is going to conver a pytorch model into onnx file. :param model: original model :param input_size: the input image size of original model requirement. input size should be a list object. :param batch_size: set a batch size in predict process. this parameter should be a int object :param save_path: onnx file path """ # convert a pytorch model to onnx file input_size.insert(0, batch_size) dummy_input = torch.randn(input_size) torch.onnx.export(model, dummy_input, save_path, verbose=False) pass
onnx 模型转 tensorRT推断用的.engine模型
使用tensorRT 自带的trtexec工具进行转码。
trtexrc --onnx xxx.onnx --saveEngine xxx.engine --fp16
使用trt模型推断
import torch from torchvision.transforms import Normalize import numpy as np import pycuda.driver as cuda # 处理读入内存的图像数据 def preprocess_image(img, f_type=16): norm = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) result = norm(torch.from_numpy(img).transpose(0, 2).transpose(1, 2)) if f_type == 16: return np.array(result, dtype=np.float16) elif f_type == 32: return np.array(result, dtype=np.float32) else: return np.array(result, dtype=np.float64) # 使用TensorRT工具进行预测 def predict(batch, d_input, d_output, output, stream, bindings, context): # result gets copied into output # transfer input data to device cuda.memcpy_htod_async(d_input, batch, stream) # execute model context.execute_async_v2(bindings, stream.handle, None) # transfer predictions back cuda.memcpy_dtoh_async(output, d_output, stream) # syncronize threads stream.synchronize() return output
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
# TensorRT 使用demo.展示TensorRT框架加速效果。 from commo.torchTool import preprocess_image, predict import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit import numpy as np import cv2 as cv import time import torchvision.models as models import torch import torch.onnx BATCH_SIZE = 32 # load the pretrained model resnet50_gpu = models.resnet50(num_classes=1000, channels=3).to('cuda').eval() # load data as imput_batch dummy_input = torch.randn(BATCH_SIZE, 3, 640, 640) url = '../image/1.jpg' img = cv.imread(url) img = cv.resize(img, (640, 640), interpolation=cv.INTER_AREA) input_batch = np.array(np.repeat(np.expand_dims(np.array(img, dtype=np.float32), axis=0), BATCH_SIZE, axis=0), dtype=np.float32) input_batch_chw = torch.from_numpy(input_batch).transpose(1, 3).transpose(2, 3) input_batch_gpu = input_batch_chw.to("cuda") # execute torch model in cuda without TensorRT t1 = time.time() with torch.no_grad(): predictions = np.array(resnet50_gpu(input_batch_gpu).cpu()) pass t2 = time.time() print('pytorch model use {} ms!'.format((t2 - t1) * 1000)) indices = (-predictions[0]).argsort()[:5] print("Class | Likelihood (torch)") res = list(zip(indices, predictions[0][indices])) print('predict result is :{}'.format(res)) preprocessed_images = np.array([preprocess_image(image) for image in input_batch]) print('preprocessed shape is {}'.format(preprocessed_images.shape)) f = open("../trt/resnet_engine_pytorch.trt", "rb") runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING)) engine = runtime.deserialize_cuda_engine(f.read()) context = engine.create_execution_context() output = np.empty([BATCH_SIZE, 25200 * 12], dtype=np.float16) print('Output size is {}'.format(output.shape)) # allocate device memory d_input = cuda.mem_alloc(1 * input_batch.nbytes) d_output = cuda.mem_alloc(1 * output.nbytes) bindings = [int(d_input), int(d_output)] stream = cuda.Stream() t1 = time.time() pred = predict(preprocessed_images, d_input, d_output, output, stream, bindings, context) res = np.resize(pred, (32, 25200, 12)) t2 = time.time() print(res.shape) print('trf model use {}ms! '.format((t2 - t1) * 1000)) indices = (-pred[0]).argsort()[:7] print("Class | Probability (trf)") print(list(zip(indices, pred[0][indices])))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。