赞
踩
用pytorch训练得到模型后,这时候需要转成onnx模型
torch提供函数
torch.onnx.export()
def export():
# 自己定义的网络结构
model = CSRNet_onnx(load_weights=r'./weights/mutil_hjxu_d_4.78.path.tar')
#
pthfile = r'./weights/mutil_hjxu_d_4.78.path.tar'
checkpoint = torch.load(pthfile, map_location="cpu")
model.load_state_dict(checkpoint['state_dict'])
# data type nhwc 1152, 648
dummy_input1 = torch.randn(1, 540, 960, 3)
input_names = ["input_1"]
output_names = ["output1"]
dynamic_axes = {'input_1': [0, 1, 2]}
torch.onnx.export(model, dummy_input1, "./weights/csrNet_540*960_hjxu.onnx", verbose=True, input_names=input_names,
output_names=output_names)
# 动态大小
# torch.onnx.export(model, dummy_input1, "./weights/model540*960_no.onnx", verbose=True, input_names=input_names,
# output_names=output_names,opset_version=10, dynamic_axes=dynamic_axes)
从python段看,onnx推断模型,分以下几个步骤
import torch
import onnxruntime as rt
import torch.onnx
from model import CSRNet, CSRNet_onnx
import cv2
import numpy as np
import onnx
def test_onnx():
img_paths = ["./result/000000000192.jpg"]
sess = rt.InferenceSession("./weights/csrNet_540*960_hjxu.onnx",None)
# sess = rt.InferenceSession(onnx_sim, providers=['CUDAExecutionProvider']) # providers=[CPUExecutionProvider,'CUDAExecutionProvider']
# providers=[CPUExecutionProvider,'CUDAExecutionProvider']
input_name = sess.get_inputs()[0].name
out_name = sess.get_outputs()[0].name
# 打印输入节点的名字,以及输入节点的shape
for i in range(len(sess.get_inputs())):
print(sess.get_inputs()[i].name, sess.get_inputs()[i].shape)
print("----------------")
# 打印输出节点的名字,以及输出节点的shape
for i in range(len(sess.get_outputs())):
print(sess.get_outputs()[i].name, sess.get_outputs()[i].shape)
for img_path in img_paths:
X_test = cv2.imread(img_path)
X_test = cv2.resize(X_test, (960, 540))
X_test = cv2.cvtColor(X_test, cv2.COLOR_BGR2RGB)
x = []
x.append(X_test)
pred_onx = sess.run([out_name], {input_name:x}) # 执行推断
这里笔者使用c的接口来推断
先介绍一下大致流程:
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
OrtEnv* env;
由于我们是在ONNX中调用onnxruntime的api,而且是用动态加载的方式,所以我们只用关心从onnx tensor 到OrtValue的相互转换,而OrtValue内部封装的是onnxruntime的tensor类型。
CreateTensorAsOrtValue / CreateTensorWithDataAsOrtValue
这两个函数进行OrtValue类型的tensor创建,这两个函数的区别一是是否需要由onnxruntime进行内存分配及其内存管理的职责。
-step4、推断。就和python中的sess.run()就可以了
g_ort->Run(session, NULL, input_node_names.data(), (const OrtValue* const*)&input_tensor, 1, output_node_names.data(), 1, &output_tensor)
整体推断代码官网demo就可见,这里简单复制一下。
#include <assert.h>
#include <onnxruntime_c_api.h>
#include <cmath>
#include <stdlib.h>
#include <stdio.h>
#include <vector>
#include "cuda_provider_factory.h"
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION);
//*****************************************************************************
// helper function to check for status
void CheckStatus(OrtStatus* status)
{
if (status != NULL) {
const char* msg = g_ort->GetErrorMessage(status);
fprintf(stderr, "%s\n", msg);
g_ort->ReleaseStatus(status);
exit(1);
}
}
int main(int argc, char* argv[]) {
//*************************************************************************
// initialize enviroment...one enviroment per process
// enviroment maintains thread pools and other state info
// 创建一个执行环境,主要用来创建onnxruntime的session
OrtEnv* env;
CheckStatus(g_ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env));
// initialize session options if needed
// 给创建session时,需要的一些参数
OrtSessionOptions* session_options;
CheckStatus(g_ort->CreateSessionOptions(&session_options));
// 设置线程数
g_ort->SetIntraOpNumThreads(session_options, 1);
// Sets graph optimization level
// 设置优化级别
g_ort->SetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC);
// Optionally add more execution providers via session_options
// E.g. for CUDA uncomment the following line:
// 设置推断函数的持有者,比如是在cuda端还是gpu端
// 把它称为执行提供者(Execution Provider,其实翻译成赞助商可能更合适,赞助算力嘛,哈哈)
CheckStatus(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
//*************************************************************************
// create session and load model into memory
// using squeezenet version 1.3
// URL = https://github.com/onnx/models/tree/master/squeezenet
OrtSession* session;
#ifdef _WIN32
const wchar_t* model_path = L"F:/senet_50.onnx";
#else
const char* model_path = "F:/senet_50.onnx";
#endif
printf("Using Onnxruntime C API\n");
// 创建一个session
CheckStatus(g_ort->CreateSession(env, model_path, session_options, &session));
//*************************************************************************
// print model input layer (node names, types, shape etc.)
size_t num_input_nodes;
OrtStatus* status;
OrtAllocator* allocator;
CheckStatus(g_ort->GetAllocatorWithDefaultOptions(&allocator));
// print number of model input nodes
status = g_ort->SessionGetInputCount(session, &num_input_nodes);
std::vector<const char*> input_node_names(num_input_nodes);
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
// Otherwise need vector<vector<>>
printf("Number of inputs = %zu\n", num_input_nodes);
// iterate over all input nodes
for (size_t i = 0; i < num_input_nodes; i++) {
// print input node names
char* input_name;
status = g_ort->SessionGetInputName(session, i, allocator, &input_name);
printf("Input %zu : name=%s\n", i, input_name);
input_node_names[i] = input_name;
// print input node types
OrtTypeInfo* typeinfo;
status = g_ort->SessionGetInputTypeInfo(session, i, &typeinfo);
const OrtTensorTypeAndShapeInfo* tensor_info;
CheckStatus(g_ort->CastTypeInfoToTensorInfo(typeinfo, &tensor_info));
ONNXTensorElementDataType type;
CheckStatus(g_ort->GetTensorElementType(tensor_info, &type));
printf("Input %zu : type=%d\n", i, type);
// print input shapes/dims
size_t num_dims;
CheckStatus(g_ort->GetDimensionsCount(tensor_info, &num_dims));
printf("Input %zu : num_dims=%zu\n", i, num_dims);
input_node_dims.resize(num_dims);
g_ort->GetDimensions(tensor_info, (int64_t*)input_node_dims.data(), num_dims);
for (size_t j = 0; j < num_dims; j++)
printf("Input %zu : dim %zu=%jd\n", i, j, input_node_dims[j]);
g_ort->ReleaseTypeInfo(typeinfo);
}
// Results should be...
// Number of inputs = 1
// Input 0 : name = data_0
// Input 0 : type = 1
// Input 0 : num_dims = 4
// Input 0 : dim 0 = 1
// Input 0 : dim 1 = 3
// Input 0 : dim 2 = 224
// Input 0 : dim 3 = 224
//*************************************************************************
// Similar operations to get output node information.
// Use OrtSessionGetOutputCount(), OrtSessionGetOutputName()
// OrtSessionGetOutputTypeInfo() as shown above.
//*************************************************************************
// Score the model using sample data, and inspect values
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
// use OrtGetTensorShapeElementCount() to get official size!
std::vector<float> input_tensor_values(input_tensor_size);
std::vector<const char*> output_node_names = { "output1" }; // 输出节点
// initialize input data with values in [0.0, 1.0]
// 这里用的是直接赋值
// input_tensor_values.data() 相当于 mat.data()
for (size_t i = 0; i < input_tensor_size; i++)
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
// create input tensor object from data values
OrtMemoryInfo* memory_info;
CheckStatus(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
OrtValue* input_tensor = NULL;
g_ort->CreateTensorAsOrtValue(allocator, input_node_dims.data(), 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor);
CheckStatus(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_tensor_values.data(), input_tensor_size * sizeof(float), input_node_dims.data(), 4, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor));
int is_tensor;
CheckStatus(g_ort->IsTensor(input_tensor, &is_tensor));
assert(is_tensor);
g_ort->ReleaseMemoryInfo(memory_info);
// score model & input tensor, get back output tensor
OrtValue* output_tensor = NULL;
//while (true)
//{
CheckStatus(g_ort->Run(session, NULL, input_node_names.data(), (const OrtValue* const*)&input_tensor, 1, output_node_names.data(), 1, &output_tensor));
CheckStatus(g_ort->IsTensor(output_tensor, &is_tensor));
assert(is_tensor);
// Get pointer to output tensor float values
float* floatarr;
CheckStatus(g_ort->GetTensorMutableData(output_tensor, (void**)&floatarr));
assert(std::abs(floatarr[0] - 0.000045) < 1e-6);
// score the model, and print scores for first 5 classes
for (int i = 0; i < 5; i++)
printf("Score for class [%d] = %f\n", i, floatarr[i]);
//}
g_ort->ReleaseValue(output_tensor);
g_ort->ReleaseValue(input_tensor);
g_ort->ReleaseSession(session);
g_ort->ReleaseSessionOptions(session_options);
g_ort->ReleaseEnv(env);
printf("Done!\n");
return 0;
}
码字不易,看完如有帮助,点个关注呗~
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。