赞
踩
基本流程就是:
1、创建构建器,由构建器创建网络,然后解析器解析ONNX文件。
2、设置一些必要参数
3、构建其构建网络然后保存成TRT模型
用到的logging.h 文件直接用NVIDIA自带的。
#include "NvInfer.h" #include "NvOnnxParser.h" #include "logging.h" #include<fstream> #include<iostream> #include<string> using namespace std; using namespace nvonnxparser; using namespace nvinfer1; #define USE_FP16 static Logger gLogger; void saveToTrtModel(std::string trt_save_path,IHostMemory*trtModelStream){ std::ofstream out(trt_save_path, std::ios::binary); if (!out.is_open()){ std::cout << "打开文件失败!" <<std:: endl; } out.write(reinterpret_cast<const char*>(trtModelStream->data()), trtModelStream->size()); out.close(); } int onnx2trt(){ std::string onnx_path = "../onnx_model/plate_detect.onnx"; std::string trt_save_path = "../onnx_model/plate_detect.trt"; int batch_size = 1; IBuilder * builder = createInferBuilder(gLogger); INetworkDefinition *network = builder->createNetworkV2(1U); // 解析模型 IParser *parser = nvonnxparser::createParser(*network, gLogger); if(!parser->parseFromFile(onnx_path.c_str(), (int)nvinfer1::ILogger::Severity::kWARNING)){ std::cout << " parse onnx file fail ..." << std::endl; return -1; } IBuilderConfig *config = builder->createBuilderConfig(); builder->setMaxBatchSize(batch_size); config->setMaxWorkspaceSize(1<<30); auto profile = builder->createOptimizationProfile(); auto input_tensor=network->getInput(0); auto input_dims = input_tensor->getDimensions(); input_dims.d[0] = 1; profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims); profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims); input_dims.d[0] = batch_size; profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims); config->addOptimizationProfile(profile); #ifdef USE_FP16 config->setFlag(BuilderFlag::kFP16); #endif #ifdef USE_INT8 config->setFlag(BuilderFlag::kINT8); #endif ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config); assert(engine); IHostMemory* trtModelStream = engine->serialize(); //序列化 保存trt saveToTrtModel(trt_save_path.c_str(), trtModelStream); parser->destroy(); engine->destroy(); network->destroy(); config->destroy(); builder->destroy(); return 0; }
转好的TRT模型在部署工程上推理运行。
基本流程:
1、trt从文件中解析出模型,并反序列化到推理CUDA推理引擎。
2、分配推理所需要的CPU、GPU内存空间
3、引擎推理获取结果
调用以下类接口即可推理。
#include "logging.h" #include "NvInfer.h" #include "NvOnnxParser.h" #include <NvInferRuntime.h> #include <cuda_runtime.h> // cuda include #include<fstream> #include<iostream> #include<string> using namespace nvinfer1; static Logger gLogger; class TrtDetct { private: char *_trtModelStream{nullptr}; IRuntime* _runtime = nullptr; ICudaEngine* _engine=nullptr; IExecutionContext* _context=nullptr; void *_inferbuffers[2]; int _outputSize = 0; int _input_h = 640; int _input_w = 640; cudaStream_t _stream; private: int getoutputSize(){ auto out_dims = _engine->getBindingDimensions(1); int outputSize = 1; for(int j = 0; j < out_dims.nbDims; j++) { std::cout << "j = " << j << " size = " << out_dims.d[j] << std::endl; outputSize *= out_dims.d[j]; } return outputSize; } public: TrtDetct(/* args */){}; ~TrtDetct(){ if (nullptr != _trtModelStream){ delete [] _trtModelStream; } }; // 文件读取模型,并反序列化成engine void load_trtmodel(std::string trt_model_path){ std::ifstream file(trt_model_path, std::ios::binary); size_t size{0}; if (file.good()) { file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); _trtModelStream = new char[size]; assert(_trtModelStream); file.read(_trtModelStream, size); file.close(); } _runtime = createInferRuntime(gLogger); assert(_runtime != nullptr); _engine = _runtime->deserializeCudaEngine(_trtModelStream, size); assert(_engine != nullptr); _context = _engine->createExecutionContext(); assert(_context != nullptr); initbuff(); } //分配处理相关内存 void initbuff(){ _outputSize = getoutputSize(); // 这两个值在生成onnx时刻已经固定 const int inputIndex = _engine->getBindingIndex("input"); const int outputIndex = _engine->getBindingIndex("output"); assert(inputIndex == 0); assert(outputIndex == 1); CHECK(cudaMalloc((void**)&_inferbuffers[inputIndex], 3 * _input_h * _input_w * sizeof(float))); //trt输入内存申请 CHECK(cudaMalloc((void**)&_inferbuffers[outputIndex], _outputSize * sizeof(float))); //trt输出内存申请 CHECK(cudaStreamCreate(&_stream)); } // 推理 void infer_trtmodel(){ //图像数据填充_inferbuffers[0],GPU CUDA处理 _context->enqueueV2((void **)_inferbuffers, _stream, nullptr); //_inferbuffers[1]模型输出后处理,可以GPU处理,否则拷贝到cpu处理 } };
//step1:创建runtime IRuntime* runtime = createInferRuntime(gLogger); assert(runtime != nullptr); //step2:反序列化创建engine ICudaEngine* engine = runtime->deserializeCudaEngine(modelData, modelSize, nullptr); assert(engine != nullptr); // 打印绑定输入输出 printf("Bindings after deserializing:\n"); for (int bi = 0; bi < engine->getNbBindings(); bi++) { if (engine->bindingIsInput(bi) == true) { printf("Binding %d (%s): Input.\n", bi, engine->getBindingName(bi)); } else { printf("Binding %d (%s): Output.\n", bi, engine->getBindingName(bi)); } } //step3:创建context,创建一些空间来存储中间激活值 IExecutionContext *context = engine->createExecutionContext(); assert(context != nullptr); //step4:根据输入输出blob名字获取输入输出索引 int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME); //step5:使用这些索引,创建buffers指向 GPU 上输入和输出缓冲区 void* buffers[2]; buffers[inputIndex] = inputBuffer; buffers[outputIndex] = outputBuffer; //step6:为输入输出开辟GPU显存 CUDA_CHECK(cudaMalloc(&buffers[inputIndex], batchSize * inputDim.c() * inputDim.h() * inputDim.w() * sizeof(float))); CUDA_CHECK(cudaMalloc(&buffers[outputIndex], batchSize * outputDim.c() * outputDim.h() * outputDim.w() * sizeof(float))); //step6:创建cuda流 cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); //step7:从CPU到GPU----拷贝input数据 CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex],//显存上的存储区域,用于存放输入数据 input, //读入内存中的数据 batchSize * inputDim.c() * inputDim.h() * inputDim.w() * sizeof(float), cudaMemcpyHostToDevice, stream)); //step8:异步推理 context->enqueueV2(buffers, stream, nullptr); //step9:从GPU到CPU----拷贝output数据 CUDA_CHECK(cudaMemcpyAsync(output,//是内存中的数据 buffers[outputIndex],//是显存中的存储区,存放模型输出 batchSize * outputDim.c() * outputDim.h() * outputDim.w() * sizeof(float), cudaMemcpyDeviceToHost, stream)); //step10:同步cuda流 CUDA_CHECK(cudaStreamSynchronize(stream)); //step11:释放资源 cudaStreamDestroy(stream); context->destroy(); engine->destroy(); runtime->destroy(); CUDA_CHECK(cudaFree(buffers[inputIndex])); CUDA_CHECK(cudaFree(buffers[outputIndex]));
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。