当前位置:   article > 正文

使用OpenCV dnn c++加载YOLOv8生成的onnx文件进行实例分割

使用OpenCV dnn c++加载YOLOv8生成的onnx文件进行实例分割

      在网上下载了60多幅包含西瓜和冬瓜的图像组成melon数据集,使用 EISeg 工具进行标注,然后使用 eiseg2yolov8 脚本将.json文件转换成YOLOv8支持的.txt文件,并自动生成YOLOv8支持的目录结构,包括melon.yaml文件,其内容如下:

  1. path: ../datasets/melon_seg # dataset root dir
  2. train: images/train # train images (relative to 'path')
  3. val: images/val # val images (relative to 'path')
  4. test: # test images (optional)
  5. # Classes
  6. names:
  7. 0: watermelon
  8. 1: wintermelon

      对melon数据集进行训练的Python实现如下:最终生成的模型文件有best.pt、best.onnx、best.torchscript

  1. import argparse
  2. import colorama
  3. from ultralytics import YOLO
  4. def parse_args():
  5. parser = argparse.ArgumentParser(description="YOLOv8 train")
  6. parser.add_argument("--yaml", required=True, type=str, help="yaml file")
  7. parser.add_argument("--epochs", required=True, type=int, help="number of training")
  8. parser.add_argument("--task", required=True, type=str, choices=["detect", "segment"], help="specify what kind of task")
  9. args = parser.parse_args()
  10. return args
  11. def train(task, yaml, epochs):
  12. if task == "detect":
  13. model = YOLO("yolov8n.pt") # load a pretrained model
  14. elif task == "segment":
  15. model = YOLO("yolov8n-seg.pt") # load a pretrained model
  16. else:
  17. print(colorama.Fore.RED + "Error: unsupported task:", task)
  18. raise
  19. results = model.train(data=yaml, epochs=epochs, imgsz=640) # train the model
  20. metrics = model.val() # It'll automatically evaluate the data you trained, no arguments needed, dataset and settings remembered
  21. model.export(format="onnx") #, dynamic=True) # export the model, cannot specify dynamic=True, opencv does not support
  22. # model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
  23. model.export(format="torchscript") # libtorch
  24. if __name__ == "__main__":
  25. colorama.init()
  26. args = parse_args()
  27. train(args.task, args.yaml, args.epochs)
  28. print(colorama.Fore.GREEN + "====== execution completed ======")

      以下是使用opencv dnn接口加载onnx文件进行实例分割的C++实现代码:

  1. namespace {
  2. constexpr bool cuda_enabled{ false };
  3. constexpr int input_size[2]{ 640, 640 }; // {height,width}, input shape (1, 3, 640, 640) BCHW and output shape(s): detect:(1,6,8400); segment:(1,38,8400),(1,32,160,160)
  4. constexpr float confidence_threshold{ 0.45 }; // confidence threshold
  5. constexpr float iou_threshold{ 0.50 }; // iou threshold
  6. constexpr float mask_threshold{ 0.50 }; // segment mask threshold
  7. #ifdef _MSC_VER
  8. constexpr char* onnx_file{ "../../../data/best.onnx" };
  9. constexpr char* torchscript_file{ "../../../data/best.torchscript" };
  10. constexpr char* images_dir{ "../../../data/images/predict" };
  11. constexpr char* result_dir{ "../../../data/result" };
  12. constexpr char* classes_file{ "../../../data/images/labels.txt" };
  13. #else
  14. constexpr char* onnx_file{ "data/best.onnx" };
  15. constexpr char* torchscript_file{ "data/best.torchscript" };
  16. constexpr char* images_dir{ "data/images/predict" };
  17. constexpr char* result_dir{ "data/result" };
  18. constexpr char* classes_file{ "data/images/labels.txt" };
  19. #endif
  20. cv::Mat modify_image_size(const cv::Mat& img)
  21. {
  22. auto max = std::max(img.rows, img.cols);
  23. cv::Mat ret = cv::Mat::zeros(max, max, CV_8UC3);
  24. img.copyTo(ret(cv::Rect(0, 0, img.cols, img.rows)));
  25. return ret;
  26. }
  27. std::vector<std::string> parse_classes_file(const char* name)
  28. {
  29. std::vector<std::string> classes;
  30. std::ifstream file(name);
  31. if (!file.is_open()) {
  32. std::cerr << "Error: fail to open classes file: " << name << std::endl;
  33. return classes;
  34. }
  35. std::string line;
  36. while (std::getline(file, line)) {
  37. auto pos = line.find_first_of(" ");
  38. classes.emplace_back(line.substr(0, pos));
  39. }
  40. file.close();
  41. return classes;
  42. }
  43. auto get_dir_images(const char* name)
  44. {
  45. std::map<std::string, std::string> images; // image name, image path + image name
  46. for (auto const& dir_entry : std::filesystem::directory_iterator(name)) {
  47. if (dir_entry.is_regular_file())
  48. images[dir_entry.path().filename().string()] = dir_entry.path().string();
  49. }
  50. return images;
  51. }
  52. float image_preprocess(const cv::Mat& src, cv::Mat& dst)
  53. {
  54. cv::cvtColor(src, dst, cv::COLOR_BGR2RGB);
  55. float scalex = src.cols * 1.f / input_size[1];
  56. float scaley = src.rows * 1.f / input_size[0];
  57. if (scalex > scaley)
  58. cv::resize(dst, dst, cv::Size(input_size[1], static_cast<int>(src.rows / scalex)));
  59. else
  60. cv::resize(dst, dst, cv::Size(static_cast<int>(src.cols / scaley), input_size[0]));
  61. cv::Mat tmp = cv::Mat::zeros(input_size[0], input_size[1], CV_8UC3);
  62. dst.copyTo(tmp(cv::Rect(0, 0, dst.cols, dst.rows)));
  63. dst = tmp;
  64. return (scalex > scaley) ? scalex : scaley;
  65. }
  66. void get_masks(const cv::Mat& features, const cv::Mat& proto, const std::vector<int>& output1_sizes, const cv::Mat& frame, const cv::Rect box, cv::Mat& mk)
  67. {
  68. const cv::Size shape_src(frame.cols, frame.rows), shape_input(input_size[1], input_size[0]), shape_mask(output1_sizes[3], output1_sizes[2]);
  69. cv::Mat res = (features * proto).t();
  70. res = res.reshape(1, { shape_mask.height, shape_mask.width });
  71. // apply sigmoid to the mask
  72. cv::exp(-res, res);
  73. res = 1.0 / (1.0 + res);
  74. cv::resize(res, res, shape_input);
  75. float scalex = shape_src.width * 1.0 / shape_input.width;
  76. float scaley = shape_src.height * 1.0 / shape_input.height;
  77. cv::Mat tmp;
  78. if (scalex > scaley)
  79. cv::resize(res, tmp, cv::Size(shape_src.width, static_cast<int>(shape_input.height * scalex)));
  80. else
  81. cv::resize(res, tmp, cv::Size(static_cast<int>(shape_input.width * scaley), shape_src.height));
  82. cv::Mat dst = tmp(cv::Rect(0, 0, shape_src.width, shape_src.height));
  83. mk = dst(box) > mask_threshold;
  84. }
  85. void draw_boxes_mask(const std::vector<std::string>& classes, const std::vector<int>& ids, const std::vector<float>& confidences,
  86. const std::vector<cv::Rect>& boxes, const std::vector<cv::Mat>& masks, const std::string& name, cv::Mat& frame)
  87. {
  88. std::cout << "image name: " << name << ", number of detections: " << ids.size() << std::endl;
  89. std::random_device rd;
  90. std::mt19937 gen(rd());
  91. std::uniform_int_distribution<int> dis(100, 255);
  92. cv::Mat mk = frame.clone();
  93. std::vector<cv::Scalar> colors;
  94. for (auto i = 0; i < classes.size(); ++i)
  95. colors.emplace_back(cv::Scalar(dis(gen), dis(gen), dis(gen)));
  96. for (auto i = 0; i < ids.size(); ++i) {
  97. cv::rectangle(frame, boxes[i], colors[ids[i]], 2);
  98. std::string class_string = classes[ids[i]] + ' ' + std::to_string(confidences[i]).substr(0, 4);
  99. cv::Size text_size = cv::getTextSize(class_string, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
  100. cv::Rect text_box(boxes[i].x, boxes[i].y - 40, text_size.width + 10, text_size.height + 20);
  101. cv::rectangle(frame, text_box, colors[ids[i]], cv::FILLED);
  102. cv::putText(frame, class_string, cv::Point(boxes[i].x + 5, boxes[i].y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
  103. mk(boxes[i]).setTo(colors[ids[i]], masks[i]);
  104. }
  105. cv::addWeighted(frame, 0.5, mk, 0.5, 0, frame);
  106. //cv::imshow("Inference", frame);
  107. //cv::waitKey(-1);
  108. std::string path(result_dir);
  109. cv::imwrite(path + "/" + name, frame);
  110. }
  111. void post_process_mask(const cv::Mat& output0, const cv::Mat& output1, const std::vector<int>& output1_sizes, const std::vector<std::string>& classes, const std::string& name, cv::Mat& frame)
  112. {
  113. std::vector<int> class_ids;
  114. std::vector<float> confidences;
  115. std::vector<cv::Rect> boxes;
  116. std::vector<std::vector<float>> masks;
  117. float scalex = frame.cols * 1.f / input_size[1]; // note: image_preprocess function
  118. float scaley = frame.rows * 1.f / input_size[0];
  119. auto scale = (scalex > scaley) ? scalex : scaley;
  120. const float* data = (float*)output0.data;
  121. for (auto i = 0; i < output0.rows; ++i) {
  122. cv::Mat scores(1, classes.size(), CV_32FC1, (float*)data + 4);
  123. cv::Point class_id;
  124. double max_class_score;
  125. cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
  126. if (max_class_score > confidence_threshold) {
  127. confidences.emplace_back(max_class_score);
  128. class_ids.emplace_back(class_id.x);
  129. masks.emplace_back(std::vector<float>(data + 4 + classes.size(), data + output0.cols)); // 32
  130. float x = data[0];
  131. float y = data[1];
  132. float w = data[2];
  133. float h = data[3];
  134. int left = std::max(0, std::min(int((x - 0.5 * w) * scale), frame.cols));
  135. int top = std::max(0, std::min(int((y - 0.5 * h) * scale), frame.rows));
  136. int width = std::max(0, std::min(int(w * scale), frame.cols - left));
  137. int height = std::max(0, std::min(int(h * scale), frame.rows - top));
  138. boxes.emplace_back(cv::Rect(left, top, width, height));
  139. }
  140. data += output0.cols;
  141. }
  142. std::vector<int> nms_result;
  143. cv::dnn::NMSBoxes(boxes, confidences, confidence_threshold, iou_threshold, nms_result);
  144. cv::Mat proto = output1.reshape(0, { output1_sizes[1], output1_sizes[2] * output1_sizes[3] });
  145. std::vector<int> ids;
  146. std::vector<float> confs;
  147. std::vector<cv::Rect> rects;
  148. std::vector<cv::Mat> mks;
  149. for (size_t i = 0; i < nms_result.size(); ++i) {
  150. auto index = nms_result[i];
  151. ids.emplace_back(class_ids[index]);
  152. confs.emplace_back(confidences[index]);
  153. boxes[index] = boxes[index] & cv::Rect(0, 0, frame.cols, frame.rows);
  154. cv::Mat mk;
  155. get_masks(cv::Mat(masks[index]).t(), proto, output1_sizes, frame, boxes[index], mk);
  156. mks.emplace_back(mk);
  157. rects.emplace_back(boxes[index]);
  158. }
  159. draw_boxes_mask(classes, ids, confs, rects, mks, name, frame);
  160. }
  161. } // namespace
  162. int test_yolov8_segment_opencv()
  163. {
  164. namespace fs = std::filesystem;
  165. auto net = cv::dnn::readNetFromONNX(onnx_file);
  166. if (net.empty()) {
  167. std::cerr << "Error: there are no layers in the network: " << onnx_file << std::endl;
  168. return -1;
  169. }
  170. if (cuda_enabled) {
  171. net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
  172. net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
  173. } else {
  174. net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
  175. net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
  176. }
  177. if (!fs::exists(result_dir)) {
  178. fs::create_directories(result_dir);
  179. }
  180. auto classes = parse_classes_file(classes_file);
  181. if (classes.size() == 0) {
  182. std::cerr << "Error: fail to parse classes file: " << classes_file << std::endl;
  183. return -1;
  184. }
  185. std::cout << "classes: ";
  186. for (const auto& val : classes) {
  187. std::cout << val << " ";
  188. }
  189. std::cout << std::endl;
  190. for (const auto& [key, val] : get_dir_images(images_dir)) {
  191. cv::Mat frame = cv::imread(val, cv::IMREAD_COLOR);
  192. if (frame.empty()) {
  193. std::cerr << "Warning: unable to load image: " << val << std::endl;
  194. continue;
  195. }
  196. auto tstart = std::chrono::high_resolution_clock::now();
  197. cv::Mat bgr = modify_image_size(frame);
  198. cv::Mat blob;
  199. cv::dnn::blobFromImage(bgr, blob, 1.0 / 255.0, cv::Size(input_size[1], input_size[0]), cv::Scalar(), true, false);
  200. net.setInput(blob);
  201. std::vector<cv::Mat> outputs;
  202. net.forward(outputs, net.getUnconnectedOutLayersNames());
  203. if (outputs.size() != 2) {
  204. std::cerr << "Error: output must have 2 layers: " << outputs.size() << std::endl;
  205. return -1;
  206. }
  207. // output0
  208. cv::Mat data0 = cv::Mat(outputs[0].size[1], outputs[0].size[2], CV_32FC1, outputs[0].data).t();
  209. // output1
  210. std::vector<int> sizes;
  211. for (int i = 0; i < 4; ++i)
  212. sizes.emplace_back(outputs[1].size[i]);
  213. cv::Mat data1 = cv::Mat(sizes, CV_32F, outputs[1].data);
  214. auto tend = std::chrono::high_resolution_clock::now();
  215. std::cout << "elapsed millisenconds: " << std::chrono::duration_cast<std::chrono::milliseconds>(tend - tstart).count() << " ms" << std::endl;
  216. post_process_mask(data0, data1, sizes, classes, key, frame);
  217. }
  218. return 0;
  219. }

      labels.txt文件内容如下:仅2类

  1. watermelon 0
  2. wintermelon 1

      说明:

      1.通过指定变量cuda_enabled判断走cpu还是gpu流程 ;

      2.opencv使用4.9.0版本,编译opencv使用的shell脚本如下:执行gpu时结果总不对,yolov8 issues中说因有不支持的layer导致

  1. #! /bin/bash
  2. if [ $# != 2 ]; then
  3. echo "Error: requires two parameters: 1: windows windows_cuda or linux; 2: relese or debug"
  4. echo "For example: $0 windows debug"
  5. exit -1
  6. fi
  7. if [ $1 != "windows" ] && [ $1 != "windows_cuda" ] && [ $1 != "linux" ]; then
  8. echo "Error: the first parameter can only be windows or linux"
  9. exit -1
  10. fi
  11. if [ $2 != "release" ] && [ $2 != "debug" ]; then
  12. echo "Error: the second parameter can only be release or debug"
  13. exit -1
  14. fi
  15. if [[ ! -d "build" ]]; then
  16. mkdir build
  17. cd build
  18. else
  19. cd build
  20. fi
  21. if [ $2 == "release" ]; then
  22. build_type="Release"
  23. else
  24. build_type="Debug"
  25. fi
  26. # copy the contents of the bin,include,lib/x64 cudnn directories to the corresponding CUDA directories: cuda 11.8+cudnn8.7.x
  27. # cudnn8.9.x: init.hpp:32 cv::dnn::cuda4dnn::checkVersions cuDNN reports version 8.7 which is not compatible with the version 8.9 with which OpenCV was built
  28. # net_impl.cpp:178 cv::dnn::dnn4_v20231225::Net::Impl::setUpNet DNN module was not built with CUDA backend; switching to CPU: SET: CUDA_ARCH_BIN, OPENCV_DNN_CUDA
  29. if [ $1 == "windows_cuda" ]; then
  30. cuda_options="-DWITH_CUDA=ON \
  31. -DWITH_CUDNN=ON \
  32. -DCUDA_FAST_MATH=ON \
  33. -DWITH_CUBLAS=ON \
  34. -DOPENCV_DNN_CUDA=ON \
  35. -DCUDA_ARCH_BIN=5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0"
  36. else
  37. cuda_options=""
  38. fi
  39. if [ $1 == "windows" ] || [ $1 == "windows_cuda" ]; then
  40. cmake \
  41. -G"Visual Studio 17 2022" -A x64 \
  42. ${cuda_options} \
  43. -DCMAKE_BUILD_TYPE=${build_type} \
  44. -DCMAKE_CONFIGURATION_TYPES=${build_type} \
  45. -DBUILD_SHARED_LIBS=ON \
  46. -DBUILD_opencv_world=ON \
  47. -DBUILD_PERF_TESTS=OFF \
  48. -DBUILD_TESTS=OFF \
  49. -DCMAKE_INSTALL_PREFIX=../install \
  50. -DOPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
  51. ..
  52. cmake --build . --target install --config $2
  53. fi
  54. if [ $1 == "linux" ]; then
  55. cmake \
  56. -DCMAKE_C_COMPILER=/usr/bin/gcc \
  57. -DCMAKE_CXX_COMPILER=/usr/bin/g++ \
  58. -DCMAKE_BUILD_TYPE=${build_type} \
  59. -DBUILD_SHARED_LIBS=ON \
  60. -DBUILD_opencv_world=ON \
  61. -DBUILD_PERF_TESTS=OFF \
  62. -DBUILD_TESTS=OFF \
  63. -DCMAKE_INSTALL_PREFIX=../install \
  64. -DOPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
  65. ..
  66. make -j2
  67. make install
  68. fi
  69. rc=$?
  70. if [[ ${rc} != 0 ]]; then
  71. echo -e "\033[0;31mError: there are some errors in the above operation, please check: ${rc}\033[0m"
  72. exit ${rc}
  73. fi

      执行结果如下图所示:同样的预测图像集,与onnxruntime结果相似,但并不完全相同,它们具有相同的后处理流程;下面显示的耗时是在cpu下,gpu下仅20毫秒左右

      其中一幅图像的分割结果如下图所示:

      GitHubhttps://github.com/fengbingchun/NN_Test

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/725069
推荐阅读
相关标签
  

闽ICP备14008679号