当前位置:   article > 正文

YOLOV5、YOLOV7使用onnx转tensorrt(engine)

yolov5 模型转化为tensor rt .engine

 近几日完成ResNet相关实现engine方法,但仅仅基于基于简单分类网络实现转换,且基于Tensorrt C++ API 构建YLOV5实现engine转换相关资料较多,然调用ONNX解析转换engine相关资料较少,因此本文将介绍如何使用onnx构建engine,并推理。

                                                                                                                                                         版本:tensorrt版本8.4,可使用8.0以上版本

一.yolov5转onnx方法:

  这里我将重点说明,我使用官方export.py能成功导出onnx文件,也能使用python的onnx runtime预测出正确结果,且也能转rknn模型完成测试,但使用tensorrt的onnx解析构建engine时候,便会出错。若知道答案可帮忙回答,万分感谢!

  方法一:

  需使用github:https://github.com/linghu8812/yolov5 成功转onnx,能被tensorrt的onnx解析,实现网络构建。

  其解析构建网络代码:

  1. const char* onnx_path = "./best.onnx";
  2. INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题
  3. IParser* parser = createParser(*network, gLogger);
  4. parser->parseFromFile(onnx_path, static_cast<int32_t>(ILogger::Severity::kWARNING));
  5. //解析有错误将返回
  6. for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
  7. std::cout << "successfully parse the onnx model" << std::endl;

方法二(修改时间:2022-0905):

可用github yolov7的转换代码https://github.com/WongKinYiu/yolov7/tree/u5  ,已测试可行。同时也测试了yolov7转换,任然可运行。

二.基于C++ 使用onnx转engine且推理

(1)yolov5 使用onnx转为engine代码,此代码比较原始,未做后处理逻辑而保存代码。

可忽略该版本代码直接使用(2)中代码。

  1. #include "NvInfer.h"
  2. #include "cuda_runtime_api.h"
  3. #include <fstream>
  4. #include <iostream>
  5. #include <map>
  6. #include <sstream>
  7. #include <vector>
  8. #include <chrono>
  9. #include <cmath>
  10. #include <cassert>
  11. #include<opencv2/core/core.hpp>
  12. #include<opencv2/highgui/highgui.hpp>
  13. #include <opencv2/opencv.hpp>
  14. // onnx转换头文件
  15. #include "NvOnnxParser.h"
  16. using namespace nvonnxparser;
  17. using namespace std;
  18. #define CHECK(status) \
  19. do\
  20. {\
  21. auto ret = (status);\
  22. if (ret != 0)\
  23. {\
  24. std::cerr << "Cuda failure: " << ret << std::endl;\
  25. abort();\
  26. }\
  27. } while (0)
  28. struct alignas(float) Detection {
  29. //center_x center_y w h
  30. float bbox[4];
  31. float conf; // bbox_conf * cls_conf
  32. float class_id;
  33. };
  34. // stuff we know about the network and the input/output blobs
  35. static const int INPUT_H = 640;
  36. static const int INPUT_W = 640;
  37. static const int OUTPUT_SIZE = 25200*85; //1000 * sizeof(Detection) / sizeof(float) + 1;
  38. const char* INPUT_BLOB_NAME = "images";
  39. const char* OUTPUT_BLOB_NAME = "output";
  40. using namespace nvinfer1;
  41. //static Logger gLogger;
  42. //构建Logger
  43. class Logger : public ILogger
  44. {
  45. void log(Severity severity, const char* msg) noexcept override
  46. {
  47. // suppress info-level messages
  48. if (severity <= Severity::kWARNING)
  49. std::cout << msg << std::endl;
  50. }
  51. } gLogger;
  52. // Creat the engine using only the API and not any parser.
  53. ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config)
  54. {
  55. const char* onnx_path = "./best.onnx";
  56. INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题
  57. IParser* parser = createParser(*network, gLogger);
  58. parser->parseFromFile(onnx_path, static_cast<int32_t>(ILogger::Severity::kWARNING));
  59. //解析有错误将返回
  60. for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
  61. std::cout << "successfully parse the onnx model" << std::endl;
  62. // Build engine
  63. builder->setMaxBatchSize(maxBatchSize);
  64. config->setMaxWorkspaceSize(1 << 20);
  65. //config->setFlag(nvinfer1::BuilderFlag::kFP16); // 设置精度计算
  66. //config->setFlag(nvinfer1::BuilderFlag::kINT8);
  67. ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
  68. std::cout << "successfully convert onnx to engine!!! " << std::endl;
  69. //销毁
  70. network->destroy();
  71. parser->destroy();
  72. return engine;
  73. }
  74. void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  75. {
  76. // Create builder
  77. IBuilder* builder = createInferBuilder(gLogger);
  78. IBuilderConfig* config = builder->createBuilderConfig();
  79. // Create model to populate the network, then set the outputs and create an engine
  80. ICudaEngine* engine = createEngine(maxBatchSize, builder, config);
  81. assert(engine != nullptr);
  82. // Serialize the engine
  83. (*modelStream) = engine->serialize();
  84. // Close everything down
  85. engine->destroy();
  86. builder->destroy();
  87. config->destroy();
  88. }
  89. void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
  90. {
  91. const ICudaEngine& engine = context.getEngine();
  92. // Pointers to input and output device buffers to pass to engine.
  93. // Engine requires exactly IEngine::getNbBindings() number of buffers.
  94. assert(engine.getNbBindings() == 2);
  95. void* buffers[2];
  96. // In order to bind the buffers, we need to know the names of the input and output tensors.
  97. // Note that indices are guaranteed to be less than IEngine::getNbBindings()
  98. const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
  99. const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
  100. //const int inputIndex = 0;
  101. //const int outputIndex = 1;
  102. // Create GPU buffers on device
  103. cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float));
  104. cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float));
  105. // Create stream
  106. cudaStream_t stream;
  107. CHECK(cudaStreamCreate(&stream));
  108. // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
  109. CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
  110. context.enqueue(batchSize, buffers, stream, nullptr);
  111. CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
  112. cudaStreamSynchronize(stream);
  113. // Release stream and buffers
  114. cudaStreamDestroy(stream);
  115. CHECK(cudaFree(buffers[inputIndex]));
  116. CHECK(cudaFree(buffers[outputIndex]));
  117. }
  118. //加工图片变成拥有batch的输入, tensorrt输入需要的格式,为一个维度
  119. void ProcessImage(cv::Mat image, float input_data[]) {
  120. //只处理一张图片,总之结果为一维[batch*3*INPUT_W*INPUT_H]
  121. //以下代码为投机取巧了
  122. cv::resize(image, image, cv::Size(INPUT_W, INPUT_H), 0, 0, cv::INTER_LINEAR);
  123. std::vector<cv::Mat> InputImage;
  124. InputImage.push_back(image);
  125. int ImgCount = InputImage.size();
  126. //float input_data[BatchSize * 3 * INPUT_H * INPUT_W];
  127. for (int b = 0; b < ImgCount; b++) {
  128. cv::Mat img = InputImage.at(b);
  129. int w = img.cols;
  130. int h = img.rows;
  131. int i = 0;
  132. for (int row = 0; row < h; ++row) {
  133. uchar* uc_pixel = img.data + row * img.step;
  134. for (int col = 0; col < INPUT_W; ++col) {
  135. input_data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
  136. input_data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
  137. input_data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
  138. uc_pixel += 3;
  139. ++i;
  140. }
  141. }
  142. }
  143. }
  144. int get_trtengine() {
  145. IHostMemory* modelStream{ nullptr };
  146. APIToModel(1, &modelStream);
  147. assert(modelStream != nullptr);
  148. std::ofstream p("./best.engine", std::ios::binary);
  149. if (!p)
  150. {
  151. std::cerr << "could not open plan output file" << std::endl;
  152. return -1;
  153. }
  154. p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
  155. modelStream->destroy();
  156. return 0;
  157. }
  158. int infer() {
  159. //加载engine引擎
  160. char* trtModelStream{ nullptr };
  161. size_t size{ 0 };
  162. std::ifstream file("./best.engine", std::ios::binary);
  163. if (file.good()) {
  164. file.seekg(0, file.end);
  165. size = file.tellg();
  166. file.seekg(0, file.beg);
  167. trtModelStream = new char[size];
  168. assert(trtModelStream);
  169. file.read(trtModelStream, size);
  170. file.close();
  171. }
  172. //反序列为engine,创建context
  173. IRuntime* runtime = createInferRuntime(gLogger);
  174. assert(runtime != nullptr);
  175. ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
  176. assert(engine != nullptr);
  177. IExecutionContext* context = engine->createExecutionContext();
  178. assert(context != nullptr);
  179. delete[] trtModelStream;
  180. //*********************推理-循环推理*********************//
  181. float time_read_img = 0.0;
  182. float time_infer = 0.0;
  183. static float prob[OUTPUT_SIZE];
  184. for (int i = 0; i < 1000; i++) {
  185. // 处理图片为固定输出
  186. auto start = std::chrono::system_clock::now(); //时间函数
  187. std::string path = "./1.jpg";
  188. std::cout << "img_path=" << path << endl;
  189. static float data[3 * INPUT_H * INPUT_W];
  190. cv::Mat img = cv::imread(path);
  191. ProcessImage(img, data);
  192. auto end = std::chrono::system_clock::now();
  193. time_read_img = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() + time_read_img;
  194. //Run inference
  195. start = std::chrono::system_clock::now(); //时间函数
  196. doInference(*context, data, prob, 1);
  197. end = std::chrono::system_clock::now();
  198. time_infer = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() + time_infer;
  199. std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
  200. //输出后处理
  201. //std::cout <<"prob="<<prob << std::endl;
  202. float cls_float = prob[0];
  203. int cls_id = 0;
  204. for (int i = 0; i < OUTPUT_SIZE; i++) {
  205. if (cls_float < prob[i]) {
  206. cls_float = prob[i];
  207. cls_id = i;
  208. }
  209. }
  210. std::cout << "i=" << i << "\tcls_id=" << cls_id << "\t cls_float=" << cls_float << std::endl;
  211. }
  212. std::cout << "C++ 2engine" << "mean read img time =" << time_read_img / 1000 << "ms\t" << "mean infer img time =" << time_infer / 1000 << "ms" << std::endl;
  213. // Destroy the engine
  214. context->destroy();
  215. engine->destroy();
  216. runtime->destroy();
  217. return 0;
  218. }
  219. int main(int argc, char** argv)
  220. {
  221. //string mode = argv[1];
  222. string mode = "-d"; //适用windows编译,固定指定参数
  223. //if (std::string(argv[1]) == "-s") {
  224. if (mode == "-s") {
  225. get_trtengine();
  226. }
  227. //else if (std::string(argv[1]) == "-d") {
  228. else if (mode == "-d") {
  229. infer();
  230. }
  231. else {
  232. return -1;
  233. }
  234. return 0;
  235. }
yolov52engine

(2)yolov5 使用onnx转为engine代码,完整代码。

代码重要步骤有解释,具体查看代码。

代码平台:windows10  visual studio  相关安装可参考我以往博客点击这里 和 这里末尾

本代码实现功能如下:

①.onnx转engine;

②.engine推理;

③CPU实现NMS方法

  1. #include "NvInfer.h"
  2. #include "cuda_runtime_api.h"
  3. #include <fstream>
  4. #include <iostream>
  5. #include <map>
  6. #include <sstream>
  7. #include <vector>
  8. #include <chrono>
  9. #include <cmath>
  10. #include <cassert>
  11. #include<opencv2/core/core.hpp>
  12. #include<opencv2/highgui/highgui.hpp>
  13. #include <opencv2/opencv.hpp>
  14. // onnx转换头文件
  15. #include "NvOnnxParser.h"
  16. using namespace nvonnxparser;
  17. using namespace std;
  18. #define CHECK(status) \
  19. do\
  20. {\
  21. auto ret = (status);\
  22. if (ret != 0)\
  23. {\
  24. std::cerr << "Cuda failure: " << ret << std::endl;\
  25. abort();\
  26. }\
  27. } while (0)
  28. struct Detection {
  29. //center_x center_y w h
  30. float bbox[4];
  31. float conf; // bbox_conf * cls_conf
  32. int class_id;
  33. int index;
  34. };
  35. // stuff we know about the network and the input/output blobs
  36. static const int INPUT_H = 640;
  37. static const int INPUT_W = 640;
  38. static const int cls_num = 80;
  39. static const int anchor_output_num = 25200; //不同输入尺寸anchor:640-->25200 | 960-->56700
  40. static const int OUTPUT_SIZE = 1* anchor_output_num *(cls_num+5); //1000 * sizeof(Detection) / sizeof(float) + 1;
  41. const char* INPUT_BLOB_NAME = "images";
  42. const char* OUTPUT_BLOB_NAME = "output";
  43. using namespace nvinfer1;
  44. //static Logger gLogger;
  45. //构建Logger
  46. class Logger : public ILogger
  47. {
  48. void log(Severity severity, const char* msg) noexcept override
  49. {
  50. // suppress info-level messages
  51. if (severity <= Severity::kWARNING)
  52. std::cout << msg << std::endl;
  53. }
  54. } gLogger;
  55. // Creat the engine using only the API and not any parser.
  56. ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config)
  57. {
  58. const char* onnx_path = "./best.onnx";
  59. INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题
  60. IParser* parser = createParser(*network, gLogger);
  61. parser->parseFromFile(onnx_path, static_cast<int32_t>(ILogger::Severity::kWARNING));
  62. //解析有错误将返回
  63. for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
  64. std::cout << "successfully parse the onnx model" << std::endl;
  65. // Build engine
  66. builder->setMaxBatchSize(maxBatchSize);
  67. config->setMaxWorkspaceSize(1 << 20);
  68. //config->setFlag(nvinfer1::BuilderFlag::kFP16); // 设置精度计算
  69. //config->setFlag(nvinfer1::BuilderFlag::kINT8);
  70. ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
  71. std::cout << "successfully convert onnx to engine!!! " << std::endl;
  72. //销毁
  73. network->destroy();
  74. parser->destroy();
  75. return engine;
  76. }
  77. void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
  78. {
  79. // Create builder
  80. IBuilder* builder = createInferBuilder(gLogger);
  81. IBuilderConfig* config = builder->createBuilderConfig();
  82. // Create model to populate the network, then set the outputs and create an engine
  83. ICudaEngine* engine = createEngine(maxBatchSize, builder, config);
  84. assert(engine != nullptr);
  85. // Serialize the engine
  86. (*modelStream) = engine->serialize();
  87. // Close everything down
  88. engine->destroy();
  89. builder->destroy();
  90. config->destroy();
  91. }
  92. void doInference(IExecutionContext& context, float* input, float* output, int batchSize)
  93. {
  94. const ICudaEngine& engine = context.getEngine();
  95. // Pointers to input and output device buffers to pass to engine.
  96. // Engine requires exactly IEngine::getNbBindings() number of buffers.
  97. assert(engine.getNbBindings() == 2);
  98. void* buffers[2];
  99. // In order to bind the buffers, we need to know the names of the input and output tensors.
  100. // Note that indices are guaranteed to be less than IEngine::getNbBindings()
  101. const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
  102. const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
  103. //const int inputIndex = 0;
  104. //const int outputIndex = 1;
  105. // Create GPU buffers on device
  106. cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float));
  107. cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float));
  108. // Create stream
  109. cudaStream_t stream;
  110. CHECK(cudaStreamCreate(&stream));
  111. // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
  112. CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
  113. context.enqueue(batchSize, buffers, stream, nullptr);
  114. CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
  115. cudaStreamSynchronize(stream);
  116. // Release stream and buffers
  117. cudaStreamDestroy(stream);
  118. CHECK(cudaFree(buffers[inputIndex]));
  119. CHECK(cudaFree(buffers[outputIndex]));
  120. }
  121. int get_trtengine() {
  122. IHostMemory* modelStream{ nullptr };
  123. APIToModel(1, &modelStream);
  124. assert(modelStream != nullptr);
  125. std::ofstream p("./best.engine", std::ios::binary);
  126. if (!p)
  127. {
  128. std::cerr << "could not open plan output file" << std::endl;
  129. return -1;
  130. }
  131. p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
  132. modelStream->destroy();
  133. return 0;
  134. }
  135. //加工图片变成拥有batch的输入, tensorrt输入需要的格式,为一个维度
  136. void ProcessImage(cv::Mat image, float input_data[]) {
  137. //只处理一张图片,总之结果为一维[batch*3*INPUT_W*INPUT_H]
  138. //以下代码为投机取巧了
  139. cv::resize(image, image, cv::Size(INPUT_W, INPUT_H), 0, 0, cv::INTER_LINEAR);
  140. std::vector<cv::Mat> InputImage;
  141. InputImage.push_back(image);
  142. int ImgCount = InputImage.size();
  143. //float input_data[BatchSize * 3 * INPUT_H * INPUT_W];
  144. for (int b = 0; b < ImgCount; b++) {
  145. cv::Mat img = InputImage.at(b);
  146. int w = img.cols;
  147. int h = img.rows;
  148. int i = 0;
  149. for (int row = 0; row < h; ++row) {
  150. uchar* uc_pixel = img.data + row * img.step;
  151. for (int col = 0; col < INPUT_W; ++col) {
  152. input_data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
  153. input_data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
  154. input_data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
  155. uc_pixel += 3;
  156. ++i;
  157. }
  158. }
  159. }
  160. }
  161. //********************************************** NMS code **********************************//
  162. /*
  163. struct Detection {
  164. //center_x center_y w h
  165. float bbox[4];
  166. float conf; // bbox_conf * cls_conf
  167. int class_id;
  168. int index;
  169. };
  170. */
  171. struct Bbox {
  172. int x;
  173. int y;
  174. int w;
  175. int h;
  176. };
  177. float iou(Bbox box1, Bbox box2) {
  178. int x1 = max(box1.x, box2.x);
  179. int y1 = max(box1.y, box2.y);
  180. int x2 = min(box1.x + box1.w, box2.x + box2.w);
  181. int y2 = min(box1.y + box1.h, box2.y + box2.h);
  182. int w = max(0, x2 - x1);
  183. int h = max(0, y2 - y1);
  184. float over_area = w * h;
  185. return over_area / (box1.w * box1.h + box2.w * box2.h - over_area);
  186. }
  187. int get_max_index(vector<Detection> pre_detection) {
  188. //获得最佳置信度的值,并返回对应的索引值
  189. int index;
  190. float conf;
  191. if (pre_detection.size() > 0) {
  192. index = 0;
  193. conf = pre_detection.at(0).conf;
  194. for (int i = 0; i < pre_detection.size(); i++) {
  195. if (conf < pre_detection.at(i).conf) {
  196. index = i;
  197. conf = pre_detection.at(i).conf;
  198. }
  199. }
  200. return index;
  201. }
  202. else {
  203. return -1;
  204. }
  205. }
  206. bool judge_in_lst(int index, vector<int> index_lst) {
  207. //index在列表index_lst中则返回true,否则返回false
  208. if (index_lst.size() > 0) {
  209. for (int i = 0; i < index_lst.size(); i++) {
  210. if (index == index_lst.at(i)) {
  211. return true;
  212. }
  213. }
  214. }
  215. return false;
  216. }
  217. vector<int> nms(vector<Detection> pre_detection, float iou_thr)
  218. {
  219. /*
  220. 返回需保存box的pre_detection对应位置索引值
  221. */
  222. int index;
  223. vector<Detection> pre_detection_new;
  224. //Detection det_best;
  225. Bbox box_best, box;
  226. float iou_value;
  227. vector<int> keep_index;
  228. vector<int> del_index;
  229. bool keep_bool;
  230. bool del_bool;
  231. if (pre_detection.size() > 0) {
  232. pre_detection_new.clear();
  233. // 循环将预测结果建立索引
  234. for (int i = 0; i < pre_detection.size(); i++) {
  235. pre_detection.at(i).index = i;
  236. pre_detection_new.push_back(pre_detection.at(i));
  237. }
  238. //循环便利获得保留box位置索引-相对输入pre_detection位置
  239. while (pre_detection_new.size() > 0) {
  240. index = get_max_index(pre_detection_new);
  241. if (index >= 0) {
  242. keep_index.push_back(pre_detection_new.at(index).index); //保留索引位置
  243. // 更新最佳保留box
  244. box_best.x = pre_detection_new.at(index).bbox[0];
  245. box_best.y = pre_detection_new.at(index).bbox[1];
  246. box_best.w = pre_detection_new.at(index).bbox[2];
  247. box_best.h = pre_detection_new.at(index).bbox[3];
  248. for (int j = 0; j < pre_detection.size(); j++) {
  249. keep_bool = judge_in_lst(pre_detection.at(j).index, keep_index);
  250. del_bool = judge_in_lst(pre_detection.at(j).index, del_index);
  251. if ((!keep_bool) && (!del_bool)) { //不在keep_index与del_index才计算iou
  252. box.x = pre_detection.at(j).bbox[0];
  253. box.y = pre_detection.at(j).bbox[1];
  254. box.w = pre_detection.at(j).bbox[2];
  255. box.h = pre_detection.at(j).bbox[3];
  256. iou_value = iou(box_best, box);
  257. if (iou_value > iou_thr) {
  258. del_index.push_back(j); //记录大于阈值将删除对应的位置
  259. }
  260. }
  261. }
  262. //更新pre_detection_new
  263. pre_detection_new.clear();
  264. for (int j = 0; j < pre_detection.size(); j++) {
  265. keep_bool = judge_in_lst(pre_detection.at(j).index, keep_index);
  266. del_bool = judge_in_lst(pre_detection.at(j).index, del_index);
  267. if ((!keep_bool) && (!del_bool)) {
  268. pre_detection_new.push_back(pre_detection.at(j));
  269. }
  270. }
  271. }
  272. }
  273. }
  274. del_index.clear();
  275. del_index.shrink_to_fit();
  276. pre_detection_new.clear();
  277. pre_detection_new.shrink_to_fit();
  278. return keep_index;
  279. }
  280. vector<Detection> postprocess(float* prob, float conf_thr = 0.2, float nms_thr = 0.4) {
  281. /*
  282. #####################此函数处理一张图预测结果#########################
  283. prob为[x y w h score multi-pre] 如80类-->(1,anchor_num,85)
  284. */
  285. vector<Detection> pre_results;
  286. vector<int> nms_keep_index;
  287. vector<Detection> results;
  288. bool keep_bool;
  289. Detection pre_res;
  290. float conf;
  291. int tmp_idx;
  292. float tmp_cls_score;
  293. for (int i = 0; i < anchor_output_num; i++) {
  294. tmp_idx = i * (cls_num + 5);
  295. pre_res.bbox[0] = prob[tmp_idx + 0];
  296. pre_res.bbox[1] = prob[tmp_idx + 1];
  297. pre_res.bbox[2] = prob[tmp_idx + 2];
  298. pre_res.bbox[3] = prob[tmp_idx + 3];
  299. conf = prob[tmp_idx + 4]; //是为目标的置信度
  300. tmp_cls_score = prob[tmp_idx + 5] * conf;
  301. pre_res.class_id = 0;
  302. pre_res.conf = 0;
  303. for (int j = 1; j < cls_num; j++) {
  304. tmp_idx = i * (cls_num + 5) + 5 + j; //获得对应类别索引
  305. if (tmp_cls_score < prob[tmp_idx] * conf)
  306. {
  307. tmp_cls_score = prob[tmp_idx] * conf;
  308. pre_res.class_id = j;
  309. pre_res.conf = tmp_cls_score;
  310. }
  311. }
  312. if (conf >= conf_thr) {
  313. pre_results.push_back(pre_res);
  314. }
  315. }
  316. //使用nms
  317. nms_keep_index=nms(pre_results,nms_thr);
  318. for (int i = 0; i < pre_results.size(); i++) {
  319. keep_bool = judge_in_lst(i, nms_keep_index);
  320. if (keep_bool) {
  321. results.push_back(pre_results.at(i));
  322. }
  323. }
  324. pre_results.clear();
  325. pre_results.shrink_to_fit();
  326. nms_keep_index.clear();
  327. nms_keep_index.shrink_to_fit();
  328. return results;
  329. }
  330. cv::Mat draw_rect(cv::Mat image, vector<Detection> results) {
  331. /*
  332. image 为图像
  333. struct Detection {
  334. float bbox[4]; //center_x center_y w h
  335. float conf; // 置信度
  336. int class_id; //类别id
  337. int index; //可忽略
  338. };
  339. */
  340. float x;
  341. float y;
  342. float y_tmp;
  343. float w;
  344. float h;
  345. string info;
  346. cv::Rect rect;
  347. for (int i = 0; i < results.size(); i++) {
  348. x = results.at(i).bbox[0];
  349. y= results.at(i).bbox[1];
  350. w= results.at(i).bbox[2];
  351. h=results.at(i).bbox[3];
  352. x = (int)(x - w / 2);
  353. y = (int)(y - h / 2);
  354. w = (int)w;
  355. h = (int)h;
  356. info = "id:";
  357. info.append(to_string(results.at(i).class_id));
  358. info.append(" s:");
  359. info.append( to_string((int)(results.at(i).conf*100) ) );
  360. info.append("%");
  361. rect= cv::Rect(x, y, w, h);
  362. cv::rectangle(image, rect, cv::Scalar(0, 255, 0), 1, 1, 0);//矩形的两个顶点,两个顶点都包括在矩形内部
  363. cv::putText(image, info, cv::Point(x, y), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 255, 0), 0.4, 1, false);
  364. }
  365. return image;
  366. }
  367. int infer() {
  368. //加载engine引擎
  369. char* trtModelStream{ nullptr };
  370. size_t size{ 0 };
  371. std::ifstream file("./best.engine", std::ios::binary);
  372. if (file.good()) {
  373. file.seekg(0, file.end);
  374. size = file.tellg();
  375. file.seekg(0, file.beg);
  376. trtModelStream = new char[size];
  377. assert(trtModelStream);
  378. file.read(trtModelStream, size);
  379. file.close();
  380. }
  381. //反序列为engine,创建context
  382. IRuntime* runtime = createInferRuntime(gLogger);
  383. assert(runtime != nullptr);
  384. ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size, nullptr);
  385. assert(engine != nullptr);
  386. IExecutionContext* context = engine->createExecutionContext();
  387. assert(context != nullptr);
  388. delete[] trtModelStream;
  389. //*********************推理-循环推理*********************//
  390. float time_read_img = 0.0;
  391. float time_infer = 0.0;
  392. float prob[OUTPUT_SIZE];
  393. vector<Detection> results;
  394. for (int i = 0; i < 1000; i++) {
  395. // 处理图片为固定输出
  396. auto start = std::chrono::system_clock::now(); //时间函数
  397. std::string path = "./7.jpg";
  398. std::cout << "img_path=" << path << endl;
  399. static float data[3 * INPUT_H * INPUT_W];
  400. cv::Mat img = cv::imread(path);
  401. ProcessImage(img, data);
  402. auto end = std::chrono::system_clock::now();
  403. time_read_img = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() + time_read_img;
  404. //Run inference
  405. start = std::chrono::system_clock::now(); //时间函数
  406. doInference(*context, data, prob, 1);
  407. end = std::chrono::system_clock::now();
  408. time_infer = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() + time_infer;
  409. std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
  410. //输出后处理
  411. //std::cout <<"prob="<<prob << std::endl;
  412. results.clear();
  413. results=postprocess(prob, 0.3, 0.4);
  414. cv::resize(img, img, cv::Size(INPUT_W, INPUT_H), 0, 0, cv::INTER_LINEAR);
  415. img=draw_rect(img,results);
  416. cv::imshow("www", img);
  417. cv::waitKey(0);
  418. cout << "ok" << endl;
  419. }
  420. std::cout << "C++ 2engine" << "mean read img time =" << time_read_img / 1000 << "ms\t" << "mean infer img time =" << time_infer / 1000 << "ms" << std::endl;
  421. // Destroy the engine
  422. context->destroy();
  423. engine->destroy();
  424. runtime->destroy();
  425. return 0;
  426. }
  427. int main(int argc, char** argv)
  428. {
  429. //string mode = argv[1];
  430. string mode = "-d"; //适用windows编译,固定指定参数
  431. //if (std::string(argv[1]) == "-s") {
  432. if (mode == "-s") {
  433. get_trtengine();
  434. }
  435. //else if (std::string(argv[1]) == "-d") {
  436. else if (mode == "-d") {
  437. infer();
  438. }
  439. else {
  440. return -1;
  441. }
  442. return 0;
  443. }
yolov52engine(onnx)

三.预测结果展示:

 自己训练模型转换测试结果:

 四.CMakeLists.txt编写(添加:2022-1006)

      介绍如何使用编译命令在ubuntu(linux)环境中运行,以下代码适用YOLO Onnx及C++ 源码构建,其中target_link_libraries(yolo /home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/libnvonnxparser.so)此库的onnx需要调用,若C++则可忽略。

引用链接:https://www.cnblogs.com/tangjunjun/p/16624566.html

engine的CMakeLists.txt构建:

  1. cmake_minimum_required(VERSION 2.6)
  2. project(yolo)
  3. add_definitions(-std=c++11)
  4. option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
  5. set(CMAKE_CXX_STANDARD 11)
  6. set(CMAKE_BUILD_TYPE Debug)
  7. include_directories(${PROJECT_SOURCE_DIR}/include)
  8. # include and link dirs of cuda and tensorrt, you need adapt them if yours are different
  9. # cuda
  10. include_directories(/usr/local/cuda/include)
  11. link_directories(/usr/local/cuda/lib64)
  12. # tensorrt
  13. include_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/include/)
  14. link_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/lib/)
  15. include_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/samples/common/)
  16. #link_directories(/home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/)
  17. # opencv
  18. find_package(OpenCV REQUIRED)
  19. include_directories(${OpenCV_INCLUDE_DIRS})
  20. add_executable(yolo ${PROJECT_SOURCE_DIR}/main.cpp)
  21. target_link_libraries(yolo nvinfer)
  22. target_link_libraries(yolo cudart)
  23. target_link_libraries(yolo ${OpenCV_LIBS})
  24. target_link_libraries(yolo /home/ubuntu/soft/TensorRT-8.2.5.1/lib/stubs/libnvonnxparser.so)
  25. add_definitions(-O2 -pthread)
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/423868
推荐阅读
相关标签
  

闽ICP备14008679号