当前位置:   article > 正文

tensorRT转换&&车端推理_tensonrt使用demo

tensonrt使用demo

tensorRT转换&&车端推理

1. onnx 转trt

基本流程就是:
1、创建构建器,由构建器创建网络,然后解析器解析ONNX文件。
2、设置一些必要参数
3、构建其构建网络然后保存成TRT模型
用到的logging.h 文件直接用NVIDIA自带的。

#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "logging.h"
#include<fstream>  
#include<iostream>  
#include<string>   
using namespace std;
using namespace nvonnxparser;
using namespace nvinfer1;

#define USE_FP16
static Logger gLogger;

void saveToTrtModel(std::string trt_save_path,IHostMemory*trtModelStream){
    std::ofstream out(trt_save_path, std::ios::binary);
    if (!out.is_open()){
    std::cout << "打开文件失败!" <<std:: endl;
    }
    out.write(reinterpret_cast<const char*>(trtModelStream->data()), trtModelStream->size());
    out.close();
}

int onnx2trt(){
    std::string onnx_path = "../onnx_model/plate_detect.onnx";
    std::string trt_save_path = "../onnx_model/plate_detect.trt";
    int batch_size = 1;
    IBuilder * builder = createInferBuilder(gLogger);
    INetworkDefinition *network = builder->createNetworkV2(1U);
    // 解析模型
    IParser *parser = nvonnxparser::createParser(*network, gLogger);
    if(!parser->parseFromFile(onnx_path.c_str(), (int)nvinfer1::ILogger::Severity::kWARNING)){
        std::cout << " parse onnx file fail ..." << std::endl;
        return -1;
    }
    IBuilderConfig *config = builder->createBuilderConfig();
    builder->setMaxBatchSize(batch_size);
    config->setMaxWorkspaceSize(1<<30);
    auto profile = builder->createOptimizationProfile();
    auto input_tensor=network->getInput(0);
    auto input_dims = input_tensor->getDimensions();
    input_dims.d[0] = 1;
     profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMIN, input_dims);
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kOPT, input_dims);
    input_dims.d[0] = batch_size;
    profile->setDimensions(input_tensor->getName(), nvinfer1::OptProfileSelector::kMAX, input_dims);
    config->addOptimizationProfile(profile);

#ifdef USE_FP16
    config->setFlag(BuilderFlag::kFP16);
#endif
#ifdef USE_INT8
    config->setFlag(BuilderFlag::kINT8);
#endif
    ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
    assert(engine);
    IHostMemory* trtModelStream = engine->serialize(); //序列化 保存trt
    saveToTrtModel(trt_save_path.c_str(), trtModelStream);
    parser->destroy();
    engine->destroy();
    network->destroy();
    config->destroy();
    builder->destroy();

    return 0;
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66

2. trt 模型推理

转好的TRT模型在部署工程上推理运行。
基本流程:
1、trt从文件中解析出模型,并反序列化到推理CUDA推理引擎。
2、分配推理所需要的CPU、GPU内存空间
3、引擎推理获取结果
调用以下类接口即可推理。

#include "logging.h"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include <NvInferRuntime.h>
#include <cuda_runtime.h> // cuda include
#include<fstream>  
#include<iostream>  
#include<string> 
using namespace nvinfer1; 
static Logger gLogger;

class TrtDetct
{
private:
    char *_trtModelStream{nullptr};
    IRuntime* _runtime = nullptr;
    ICudaEngine* _engine=nullptr;
    IExecutionContext* _context=nullptr;
    void *_inferbuffers[2];
    int _outputSize = 0;
    int _input_h = 640;
    int _input_w = 640;
    cudaStream_t _stream;

private:
 int getoutputSize(){
    auto out_dims = _engine->getBindingDimensions(1);
    int outputSize = 1;
    for(int j = 0; j < out_dims.nbDims; j++) {
        std::cout << "j = " << j << " size = " << out_dims.d[j] << std::endl;
        outputSize *= out_dims.d[j];
    }
    return outputSize;
}
public:
    TrtDetct(/* args */){};
    ~TrtDetct(){
        if (nullptr != _trtModelStream){
            delete [] _trtModelStream;
        }
    };
    // 文件读取模型,并反序列化成engine
    void load_trtmodel(std::string trt_model_path){
        std::ifstream file(trt_model_path, std::ios::binary);
        size_t size{0};
        if (file.good()) {
                file.seekg(0, file.end);
                size = file.tellg();
                file.seekg(0, file.beg);
                _trtModelStream = new char[size];
                assert(_trtModelStream);
                file.read(_trtModelStream, size);
                file.close();
        }
    _runtime = createInferRuntime(gLogger);
    assert(_runtime != nullptr);
    _engine = _runtime->deserializeCudaEngine(_trtModelStream, size);
    assert(_engine != nullptr); 
    _context = _engine->createExecutionContext();
    assert(_context != nullptr);
    initbuff();
    }

    //分配处理相关内存
    void initbuff(){
        _outputSize = getoutputSize();
        // 这两个值在生成onnx时刻已经固定
        const int inputIndex = _engine->getBindingIndex("input");
        const int outputIndex = _engine->getBindingIndex("output");
        assert(inputIndex == 0);
        assert(outputIndex == 1);
        CHECK(cudaMalloc((void**)&_inferbuffers[inputIndex],  3 * _input_h * _input_w * sizeof(float)));  //trt输入内存申请
        CHECK(cudaMalloc((void**)&_inferbuffers[outputIndex], _outputSize * sizeof(float)));           //trt输出内存申请
        CHECK(cudaStreamCreate(&_stream));
    }
    // 推理
    void infer_trtmodel(){
        //图像数据填充_inferbuffers[0],GPU CUDA处理
        _context->enqueueV2((void **)_inferbuffers, _stream, nullptr);
        //_inferbuffers[1]模型输出后处理,可以GPU处理,否则拷贝到cpu处理
    }
};


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
//step1:创建runtime
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
//step2:反序列化创建engine
ICudaEngine* engine = runtime->deserializeCudaEngine(modelData, modelSize, nullptr);
assert(engine != nullptr);
// 打印绑定输入输出
printf("Bindings after deserializing:\n");
for (int bi = 0; bi < engine->getNbBindings(); bi++) 
{
    if (engine->bindingIsInput(bi) == true) 
    {
        printf("Binding %d (%s): Input.\n",  bi, engine->getBindingName(bi));
    } 
    else 
    {
        printf("Binding %d (%s): Output.\n", bi, engine->getBindingName(bi));
    }
}



//step3:创建context,创建一些空间来存储中间激活值
IExecutionContext *context = engine->createExecutionContext();
assert(context != nullptr);

//step4:根据输入输出blob名字获取输入输出索引
int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
//step5:使用这些索引,创建buffers指向 GPU 上输入和输出缓冲区
void* buffers[2];
buffers[inputIndex] = inputBuffer;
buffers[outputIndex] = outputBuffer;
//step6:为输入输出开辟GPU显存
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], batchSize * inputDim.c() * inputDim.h() * inputDim.w() * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[outputIndex], batchSize * outputDim.c() * outputDim.h() * outputDim.w() * sizeof(float)));
//step6:创建cuda流
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
//step7:从CPU到GPU----拷贝input数据
CUDA_CHECK(cudaMemcpyAsync(buffers[inputIndex],//显存上的存储区域,用于存放输入数据
                           input, //读入内存中的数据
                           batchSize * inputDim.c() * inputDim.h() * inputDim.w() * sizeof(float),
                           cudaMemcpyHostToDevice, 
                           stream));
//step8:异步推理
context->enqueueV2(buffers, stream, nullptr);

//step9:从GPU到CPU----拷贝output数据
CUDA_CHECK(cudaMemcpyAsync(output,//是内存中的数据
                           buffers[outputIndex],//是显存中的存储区,存放模型输出
                           batchSize * outputDim.c() * outputDim.h() * outputDim.w() * sizeof(float),
                           cudaMemcpyDeviceToHost,
                           stream));
//step10:同步cuda流
CUDA_CHECK(cudaStreamSynchronize(stream));
//step11:释放资源
cudaStreamDestroy(stream);
context->destroy();
engine->destroy();
runtime->destroy();
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/菜鸟追梦旅行/article/detail/136613
推荐阅读
相关标签
  

闽ICP备14008679号