OpenVINO™是英特尔推出的一个用于优化和部署AI推理的开源工具包。常用于 Inter 的集成显卡网络推理使用。
注意:笔者当前使用的版本为 openvino_2021。
假设你已经有了模型的 xml 文件和对应的 bin 文件了,基本代码流程如下:
#include <stdio.h> #include <string> #include "inference_engine.hpp" #define LOGD(fmt, ...) printf("[%s][%s][%d]: " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__) using namespace InferenceEngine; int main(int argc, char *argv[]) { // 1.查看版本号信息 const Version* version = GetInferenceEngineVersion(); LOGD("version description: %s, buildNumber: %s, major.minor: %d.%d", version->description, version->buildNumber, version->apiVersion.major, version->apiVersion.minor); // 2.创建推理引擎 Core ie; std::vector<std::string> devices = ie.GetAvailableDevices(); // 查看可使用的Devices,包含 CPU、GPU 等 for (std::string device : devices) { LOGD("GetAvailableDevices: %s", device.c_str()); } // 3.读取模型文件 const std::string input_model_xml = "model.xml"; CNNNetwork network = ie.ReadNetwork(input_model_xml); // 4.配置输入输出信息 InputsDataMap& inputs = network.getInputsInfo(); for (auto& input : inputs) { auto& input_name = input.first; //input是一个键值对类型 InputInfo::Ptr& input_info = input.second; input_info->setLayout(Layout::NCHW); // 设置排列方式 input_info->setPrecision(Precision::FP32); // 设置精度为float32 input_info->getPreProcess().setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR); input_info->getPreProcess().setColorFormat(ColorFormat::RAW); // 设置图片格式 } OutputsDataMap& outputs = network.getOutputsInfo(); for (auto& output : outputs) { auto& output_name = output.first; //output也是一个键值对类型 DataPtr& output_info = output.second; output_info->setPrecision(Precision::FP32); auto& dims = output_info->getDims(); LOGD("output shape name: %s, dims: [%d, %d, %d, %d]", output_name.c_str(), dims[0], dims[1], dims[2], dims[3]); } // 5.根据设备(CPU、GPU 等)加载网络 std::string device_name = "CPU"; // 可用的device通过ie.GetAvailableDevices查询 ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name); // 6.创建推理请求 InferRequest infer_request = executable_network.CreateInferRequest(); /* 如上6步,在多次执行网络推理过程中,可以缓存起来只创建一次,节约耗时*/ // 7.设置输入数据 InputsDataMap& inputs = network.getInputsInfo(); for (auto& input : inputs) { auto& input_name = input.first; //input是一个键值对类型 Blob::Ptr blob = infer_request.GetBlob(name); unsigned char* data = static_cast<unsigned char*>(blob->buffer()); // TODO: 通过memcpy等方式给data赋值 // readFile(input_path, data); } // 8.网络推理 infer_request.Infer(); // 9.获取输出 OutputsDataMap& outputs = network.getOutputsInfo(); for (auto& output : outputs) { auto& output_name = output.first; //output也是一个键值对类型 const Blob::Ptr output_blob = infer_request.GetBlob(name); LOGD("size: %d, byte_size: %d", output_blob->size(), output_blob->byteSize()); const float* output_data = static_cast<PrecisionTrait<Precision::FP32>::value_type*>(output_blob->buffer()); // writeFile(path, (void *)output_data, output_blob->byteSize()); } }
其余更复杂的使用场景,可以参考下载的SDK中的示例,路径是 .\openvino_2021\inference_engine\samples\cpp。
* @brief Reads models from IR and ONNX formats
* @param modelPath path to model
* @param binPath path to data file
* For IR format (*.bin):
* * if path is empty, will try to read bin file with the same name as xml and
* * if bin file with the same name was not found, will load IR without weights.
* For ONNX format (*.onnx or *.prototxt):
* * binPath parameter is not used.
* @return CNNNetwork
CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = {}) const;
如果bin文件路径和xml文件路径一致且文件名相同,该参数可以省略,如:CNNNetwork network = ie.ReadNetwork("model.xml")
* @brief Reads models from IR and ONNX formats
* @param model string with model in IR or ONNX format
* @param weights shared pointer to constant blob with weights
* Reading ONNX models doesn't support loading weights from data blobs.
* If you are using an ONNX model with external data files, please use the
* `InferenceEngine::Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) const`
* function overload which takes a filesystem path to the model.
* For ONNX case the second parameter should contain empty blob.
* @note Created InferenceEngine::CNNNetwork object shares the weights with `weights` object.
* So, do not create `weights` on temporary data which can be later freed, since the network
* constant datas become to point to invalid memory.
* @return CNNNetwork
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const;
extern unsigned char __res_model_xml [];
extern unsigned int __res_model_xml_size;
extern unsigned char __res_model_bin [];
extern unsigned int __res_model_bin_size;
std::string model(__res_model_xml, __res_model_xml + __res_model_xml_size);
CNNNetwork network = ie.ReadNetwork(model,
{__res_model_bin_size}, InferenceEngine::C}, __res_model_bin));
