import sys sys.path.insert(0, '.') import argparse import torch import torch.nn as nn from PIL import Image import numpy as np import cv2 import lib.transform_cv2 as T from lib.models import model_factory from configs import set_cfg_from_file import onnx import onnxruntime torch.set_grad_enabled(False) np.random.seed(123) # args parse = argparse.ArgumentParser() parse.add_argument('--config', dest='config', type=str, default='configs/bisenetv1_steel_t.py',) parse.add_argument('--weight-path', type=str, default='./res/model_final_120_0.864089846611023.pth',) parse.add_argument('--img-path', dest='img_path', type=str, default='./datasets/steel_total/image/train/11_7.jpg',) args = parse.parse_args() cfg = set_cfg_from_file(args.config) # define model net = model_factory[cfg.model_type](cfg.n_cats, aux_mode='pred') net.load_state_dict(torch.load(args.weight_path, map_location='cpu'), strict=False) # 构造模型实例 net.eval() # 定义输入名称,list结构,可能有多个输入 input_names = ['input'] # 定义输出名称,list结构,可能有多个输出 output_names = ['output'] # 构造输入用以验证onnx模型的正确性 input = torch.rand(1, 3, 512, 512) output_path = "bisenet.onnx" # 导出 torch.onnx.export(net, input, output_path, export_params=True, opset_version=11, do_constant_folding=True, input_names=input_names, output_names=output_names) # 加载 ONNX 模型 onnx_model = onnx.load("bisenet.onnx") onnx_model_graph = onnx_model.graph onnx_session = onnxruntime.InferenceSession(onnx_model.SerializeToString()) # 使用随机张量测试 ONNX 模型 x = torch.randn(1, 3, 512, 512).numpy() onnx_output = onnx_session.run(output_names, {input_names[0]: x})[0] print(f"PyTorch output: {net(torch.from_numpy(x)).detach().numpy()[0, :5]}") print(f"ONNX output: {onnx_output[0, :5]}")
import torch import torchvision from PIL import Image from torchvision import transforms import torchvision.models as models import matplotlib.pyplot as plt import time import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit import pdb import os import numpy as np import cv2 # This logger is required to build an engine TRT_LOGGER = trt.Logger() filename = "./datasets/steel_total/image/train/11_1.jpg" engine_file_path = "bisenet_engine.trt"6 class HostDeviceMem(object): def __init__(self, host_mem, device_mem): """Within this context, host_mom means the cpu memory and device means the GPU memory """ self.host = host_mem self.device = device_mem def __str__(self): return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device) def __repr__(self): return self.__str__() def allocate_buffers(engine): inputs = [] outputs = [] bindings = [] stream = cuda.Stream() for binding in engine: size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size dtype = trt.nptype(engine.get_binding_dtype(binding)) # Allocate host and device buffers host_mem = cuda.pagelocked_empty(size, dtype) device_mem = cuda.mem_alloc(host_mem.nbytes) # Append the device buffer to device bindings. bindings.append(int(device_mem)) # Append to the appropriate list. if engine.binding_is_input(binding): inputs.append(HostDeviceMem(host_mem, device_mem)) else: outputs.append(HostDeviceMem(host_mem, device_mem)) return inputs, outputs, bindings, stream def do_inference(context, bindings, inputs, outputs, stream, batch_size=1): # Transfer data from CPU to the GPU. [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs] # Run inference. t_model = time.perf_counter() context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle) print(f'only one line cost:{time.perf_counter() - t_model:.8f}s') # Transfer predictions back from the GPU. [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs] # Synchronize the stream stream.synchronize() # Return only the host outputs. return [out.host for out in outputs] print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(f.read()) # create the context for this engine context = engine.create_execution_context() # allocate buffers for input and output inputs, outputs, bindings, stream = allocate_buffers(engine) # input, output: host # bindings normalize = transforms.Normalize(mean=(0.3442322, 0.3442322, 0.3442322), # city, rgb std=(0.21136102, 0.21136102, 0.21136102)) transform = transforms.Compose([ transforms.Resize(512), transforms.ToTensor(), normalize] ) t_model = time.perf_counter() # 读图 img = Image.open("./datasets/steel_total/image/train/11_1.jpg") #print(img.size) # 对图像进行归一化 img_p = transform(img) #print(img_p.shape) # 增加一个维度 img_normalize = torch.unsqueeze(img_p, 0) #print(img_normalize.shape) # output #shape_of_output = (512, 512) # covert to numpy img_normalize_np = img_normalize.cpu().data.numpy() # Load data to the buffer inputs[0].host = img_normalize_np #print(inputs[0].host.shape) # Do Inference trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data print(f'do inference cost:{time.perf_counter() - t_model:.8f}s') print(len(trt_outputs)) pred = trt_outputs[0].reshape(512, 512)*255 #pred = palette[out] cv2.imwrite('./res.jpg', pred)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。