ONNXRuntime(Open Neural Network Exchange)是微软推出的一款针对ONNX模型格式的推理框架,用户可以非常便利的用其运行一个onnx模型。ONNXRuntime支持多种运行后端包括CPU,GPU,TensorRT,DML等。可以说ONNXRuntime是对ONNX模型最原生的支持,只要掌握模型导出的相应操作,便能对将不同框架的模型进行部署,提高开发效率。
Release ONNX Runtime v1.9.0 · microsoft/onnxruntime · GitHub
- # 引入头文件
- include_directories(......../onnxruntime/include)
- # 引入库文件
- link_directories(......../onnxruntime/lib)
模块导出 .onnx
- import torch
- checkpoint = torch.load(model_path)
- model = ModelNet(params)
- model.load_state_dict(checkpoint['model'])
- model.eval()
- input_x_1 = torch.randn(10,20)
- input_x_2 = torch.randn(1,20,5)
- output, mask = model(input_x_1, input_x_2)
- torch.onnx.export(model,
- (input_x_1, input_x_2),
- 'model.onnx',
- input_names = ['input','input_mask'],
- output_names = ['output','output_mask'],
- opset_version=11,
- verbose = True,
- dynamic_axes={'input':{1,'seqlen'}, 'input_mask':{1:'seqlen',2:'time'},'output_mask':{0:'time'}})

torch.onnx.export参数在文档里面都有,opset_version对应的版本很重要,dynamic_axes是对输入和输出对应维度可以进行动态设置,不设置的话输入和输出的Tensor 的 shape是不能改变的,如果输入固定就不需要加。
- import onnxruntime as ort
- import numpy as np
- ort_session = ort.InferenceSession('model.onnx')
- outputs = ort_session.run(None,{'input':np.random.randn(10,20),'input_mask':np.random.randn(1,20,5)})
- # 由于设置了dynamic_axes,支持对应维度的变化
- outputs = ort_session.run(None,{'input':np.random.randn(10,5),'input_mask':np.random.randn(1,26,2)})
- # outputs 为 包含'output'和'output_mask'的list
- import onnx
- model = onnx.load('model.onnx')
- onnx.checker.check_model(model)
如果没有异常代表导出的模型没有问题,目前torch.onnx.export只能对部分支持的Tensor操作进行识别,详情参考Supported operators,对于包括transformer等基本的模型都是没有问题的,如果出现ATen等问题,你就需要对模型不支持的Tensor操作进行改进,以免影响C++对该模型的使用。
模型加载时,会在C++后端会调用对应的Load()函数,InferenceSession一共提供了8种Load函数。包读从url,ModelProto,void* model data,model istream等读取ModelProto。InferenceSession会对ModelProto进行解析然后持有其对应的Model成员。
即sess->Initialize(),这时InferenceSession会根据自身持有的model和execution providers进行进一步的初始化(在第一阶段Session构造时仅仅持有了空壳子成员变量)。该步骤是InferenceSession初始化的核心,一系列核心操作如内存分配,model partition,kernel注册等都会在这个阶段完成。
- #include "onnxruntime_cxx_api.h"
- #include "opencv2/opencv.hpp"
- #include <vector>
- #define CHW 0
- class BasicOrtHandler {
- public:
- Ort::Value BasicOrtHandler::create_tensor(const cv::Mat &mat, const std::vector<int64_t> &tensor_dims, const Ort::MemoryInfo &memory_info_handler, std::vector<float> &tensor_value_handler, unsigned int data_format);
- protected:
- Ort::Env ort_env;
- Ort::Session *ort_session = nullptr;
- const char *input_name = nullptr;
- std::vector<const char *> input_node_names;
- std::vector<int64_t> input_node_dims; // 1 input only.
- std::size_t input_tensor_size = 1;
- std::vector<float> input_values_handler;
- // create input tensor
- Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
- std::vector<const char *> output_node_names;
- std::vector<std::vector<int64_t>> output_node_dims; // >=1 outputs
- const char*onnx_path = nullptr;
- const char *log_id = nullptr;
- int num_outputs = 1;
- protected:
- const unsigned int num_threads; // initialize at runtime.
- protected:
- explicit BasicOrtHandler(const std::string &_onnx_path, unsigned int _num_threads = 1);
- virtual ~BasicOrtHandler();
- protected:
- BasicOrtHandler(const BasicOrtHandler &) = delete;
- BasicOrtHandler(BasicOrtHandler &&) = delete;
- BasicOrtHandler &operator=(const BasicOrtHandler &) = delete;
- BasicOrtHandler &operator=(BasicOrtHandler &&) = delete;
- protected:
- virtual Ort::Value transform(const cv::Mat &mat) = 0;
- private:
- void initialize_handler();
- };

- BasicOrtHandler::BasicOrtHandler(const std::string &_onnx_path, unsigned int _num_threads) : log_id(_onnx_path.data()), num_threads(_num_threads) {
- // string to wstring
- #ifdef LITE_WIN32
- std::wstring _w_onnx_path(lite::utils::to_wstring(_onnx_path));
- onnx_path = _w_onnx_path.data();
- #else
- onnx_path = _onnx_path.data();
- #endif
- initialize_handler();
- }
- void BasicOrtHandler::initialize_handler() {
- // set ort env
- ort_env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, log_id);
- // 0. session options
- Ort::SessionOptions session_options;
- // set op threads
- session_options.SetIntraOpNumThreads(num_threads);
- // set Optimization options:
- session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
- // set log level
- session_options.SetLogSeverityLevel(4);
- // GPU compatiable.
- // OrtCUDAProviderOptions provider_options;
- // session_options.AppendExecutionProvider_CUDA(provider_options);
- // #ifdef USE_CUDA
- // OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0); // C API stable.
- // #endif
- // 1. session
- ort_session = new Ort::Session(ort_env, onnx_path, session_options);
- // memory allocation and options
- Ort::AllocatorWithDefaultOptions allocator;
- // 2. input name & input dims
- input_name = ort_session->GetInputName(0, allocator);
- input_node_names.resize(1);
- input_node_names[0] = input_name;
- // 3. input names & output dimms
- Ort::TypeInfo type_info = ort_session->GetInputTypeInfo(0);
- auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
- input_tensor_size = 1;
- input_node_dims = tensor_info.GetShape();
- for (unsigned int i = 0; i < input_node_dims.size(); ++i) {
- input_tensor_size *= input_node_dims.at(i);
- }
- input_values_handler.resize(input_tensor_size);
- // 4. output names & output dimms
- num_outputs = ort_session->GetOutputCount();
- output_node_names.resize(num_outputs);
- for (unsigned int i = 0; i < num_outputs; ++i) {
- output_node_names[i] = ort_session->GetOutputName(i, allocator);
- Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
- auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
- auto output_dims = output_tensor_info.GetShape();
- output_node_dims.push_back(output_dims);
- }
- }
- Ort::Value BasicOrtHandler::create_tensor(const cv::Mat &mat, const std::vector<int64_t> &tensor_dims, const Ort::MemoryInfo &memory_info_handler, std::vector<float> &tensor_value_handler, unsigned int data_format) throw(std::runtime_error) {
- const unsigned int rows = mat.rows;
- const unsigned int cols = mat.cols;
- const unsigned int channels = mat.channels();
- cv::Mat mat_ref;
- if (mat.type() != CV_32FC(channels)){
- mat.convertTo(mat_ref, CV_32FC(channels));
- } else{
- mat_ref = mat; // reference only. zero-time cost. support 1/2/3/... channels
- }
- if (tensor_dims.size() != 4) {
- throw std::runtime_error("dims mismatch.");
- }
- if (tensor_dims.at(0) != 1) {
- throw std::runtime_error("batch != 1");
- }
- // CXHXW
- if (data_format == CHW) {
- const unsigned int target_channel = tensor_dims.at(1);
- const unsigned int target_height = tensor_dims.at(2);
- const unsigned int target_width = tensor_dims.at(3);
- const unsigned int target_tensor_size = target_channel * target_height * target_width;
- if (target_channel != channels) {
- throw std::runtime_error("channel mismatch.");
- }
- tensor_value_handler.resize(target_tensor_size);
- cv::Mat resize_mat_ref;
- if (target_height != rows || target_width != cols) {
- cv::resize(mat_ref, resize_mat_ref, cv::Size(target_width, target_height));
- } else{
- resize_mat_ref = mat_ref; // reference only. zero-time cost.
- }
- std::vector<cv::Mat> mat_channels;
- cv::split(resize_mat_ref, mat_channels);
- // CXHXW
- for (unsigned int i = 0; i < channels; ++i){
- std::memcpy(tensor_value_handler.data() + i * (target_height * target_width), mat_channels.at(i).data,target_height * target_width * sizeof(float));
- }
- return Ort::Value::CreateTensor<float>(memory_info_handler, tensor_value_handler.data(), target_tensor_size, tensor_dims.data(), tensor_dims.size());
- }
- // HXWXC
- const unsigned int target_channel = tensor_dims.at(3);
- const unsigned int target_height = tensor_dims.at(1);
- const unsigned int target_width = tensor_dims.at(2);
- const unsigned int target_tensor_size = target_channel * target_height * target_width;
- if (target_channel != channels) {
- throw std::runtime_error("channel mismatch!");
- }
- tensor_value_handler.resize(target_tensor_size);
- cv::Mat resize_mat_ref;
- if (target_height != rows || target_width != cols) {
- cv::resize(mat_ref, resize_mat_ref, cv::Size(target_width, target_height));
- } else {
- resize_mat_ref = mat_ref; // reference only. zero-time cost.
- }
- std::memcpy(tensor_value_handler.data(), resize_mat_ref.data, target_tensor_size * sizeof(float));
- return Ort::Value::CreateTensor<float>(memory_info_handler, tensor_value_handler.data(), target_tensor_size, tensor_dims.data(), tensor_dims.size());
- }

- const std::string _onnx_path="";
- unsigned int _num_threads = 1;
- //init inference
- BasicOrtHandler basicOrtHandler(_onnx_path,_num_threads);
- // after transform image
- const cv::Mat mat = "";
- const std::vector<int64_t> &tensor_dims = basicOrtHandler.input_node_dims;
- const Ort::MemoryInfo &memory_info_handler = basicOrtHandler.memory_info_handler;
- std::vector<float> &tensor_value_handler = basicOrtHandler.input_values_handler;
- unsigned int data_format = CHW; // 预处理后的模式
- // 1. make input tensor
- Ort::Value input_tensor = basicOrtHandler.create_tensor(mat_rs);
- // 2. inference scores & boxes.
- auto output_tensors = ort_session->Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), num_outputs);
- // 3. get output tensor
- Ort::Value &pred = output_tensors.at(0); // (1,n,c)
- //postprocess
- ...

- #include <assert.h>
- #include <vector>
- #include <onnxruntime_cxx_api.h>
- int main(int argc, char* argv[]) {
- Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
- Ort::SessionOptions session_options;
- session_options.SetIntraOpNumThreads(1);
- session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
- #ifdef _WIN32
- const wchar_t* model_path = L"model.onnx";
- #else
- const char* model_path = "model.onnx";
- #endif
- Ort::Session session(env, model_path, session_options);
- // print model input layer (node names, types, shape etc.)
- Ort::AllocatorWithDefaultOptions allocator;
- // print number of model input nodes
- size_t num_input_nodes = session.GetInputCount();
- std::vector<const char*> input_node_names = {"input","input_mask"};
- std::vector<const char*> output_node_names = {"output","output_mask"};
- std::vector<int64_t> input_node_dims = {10, 20};
- size_t input_tensor_size = 10 * 20;
- std::vector<float> input_tensor_values(input_tensor_size);
- for (unsigned int i = 0; i < input_tensor_size; i++)
- input_tensor_values[i] = (float)i / (input_tensor_size + 1);
- // create input tensor object from data values
- auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
- Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 2);
- assert(input_tensor.IsTensor());
- std::vector<int64_t> input_mask_node_dims = {1, 20, 4};
- size_t input_mask_tensor_size = 1 * 20 * 4;
- std::vector<float> input_mask_tensor_values(input_mask_tensor_size);
- for (unsigned int i = 0; i < input_mask_tensor_size; i++)
- input_mask_tensor_values[i] = (float)i / (input_mask_tensor_size + 1);
- // create input tensor object from data values
- auto mask_memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
- Ort::Value input_mask_tensor = Ort::Value::CreateTensor<float>(mask_memory_info, input_mask_tensor_values.data(), input_mask_tensor_size, input_mask_node_dims.data(), 3);
- assert(input_mask_tensor.IsTensor());
- std::vector<Ort::Value> ort_inputs;
- ort_inputs.push_back(std::move(input_tensor));
- ort_inputs.push_back(std::move(input_mask_tensor));
- // score model & input tensor, get back output tensor
- auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), ort_inputs.data(), ort_inputs.size(), output_node_names.data(), 2);
- // Get pointer to output tensor float values
- float* floatarr = output_tensors[0].GetTensorMutableData<float>();
- float* floatarr_mask = output_tensors[1].GetTensorMutableData<float>();
- printf("Done!\n");
- return 0;
- }

g++ infer.cpp -o infer onnxruntime-linux-x64-1.4.0/lib/libonnxruntime.so.1.4.0 -Ionnxruntime-linux-x64-1.4.0/include/ -std=c++11
- typedef enum ONNXTensorElementDataType {
- ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // maps to c type float
- ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8, // maps to c type uint8_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8, // maps to c type int8_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16, // maps to c type uint16_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16, // maps to c type int16_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32, // maps to c type int32_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64, // maps to c type int64_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING, // maps to c++ type std::string
- ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE, // maps to c type double
- ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32, // maps to c type uint32_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64, // maps to c type uint64_t
- ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64, // complex with float32 real and imaginary components
- ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128, // complex with float64 real and imaginary components
- ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16 // Non-IEEE floating-point format based on IEEE754 single-precision
- } ONNXTensorElementDataType;

- std::vector<uint8_t> mask_tensor_values;
- for(int i = 0; i < mask_tensor_size; i++){
- mask_tensor_values.push_back((uint8_t)(true));
- }
- auto mask_memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
- Ort::Value mask_tensor = Ort::Value::CreateTensor<bool>(mask_memory_info, reinterpret_cast<bool *>(mask_tensor_values.data()),mask_tensor_size, mask_node_dims.data(), 3);
ONNX Runtime使用简单介绍_竹叶青lvye的博客-CSDN博客_onnxruntime 使用
onnxruntime的c++使用_chencision的博客-CSDN博客_c++ onnxruntime
onnxruntime C++ 使用(一)_SongpingWang的技术博客_51CTO博客
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。