当前位置:   article > 正文

C++实现YOLO目标识别图像预处理、后处理_yolo后处理

yolo后处理

【背景】

为什么要自己手写YOLO目标识别的图像后处理部分呢?因为我们使用了一款自研的FPGA开发板,并在其上开发部署了YOLO目标识别加速计算电路,作为PS端开发的我,需要把输入图像送到指定DDR地址,开启FPGA加速计算寄存器,等待计算结果发起中断,取回目标识别结果原始数据,经过后处理操作对其进行还原,并将结果标注到原始图像上。

【干货】

话不多说,直接上代码!

通过下面这份代码,你至少应该学会以下几个操作:

1、学会如何从文件夹里读取数据集图片

2、学会如何将FPGA结果反量化,解码,极大值抑制等基本操作

3、学会如何将识别结果标注,保存

4、学会通过操作寄存器控制FPGA的运转

  1. static char text[256];
  2. static int length = 5+5;
  3. static int kind_nums = 5;
  4. static float anchors_0[6] = {2.53125f, 2.5625f, 4.21875f, 5.28125f, 10.75f, 9.96875f};
  5. static float anchors_1[6] = {1.4375f, 1.6875f, 2.3125f, 3.625f, 5.0625f, 5.125f};
  6. static float conf_thresh_ = 0.5f;
  7. static float nms_thresh_ = 0.4f;
  8. static std::string label_name[] = {"person", "aeroplane", "bus", "car", "truck"};
  9. bool PedestrianDetection::get_files_list()
  10. {
  11. struct dirent *ptr;
  12. DIR *dir;
  13. std::string PATH = dataset_dir_;
  14. dir = opendir(PATH.c_str());
  15. while((ptr=readdir(dir)) != nullptr)
  16. {
  17. if(ptr->d_name[0] == '.')
  18. continue;
  19. files_list.push_back(ptr->d_name);
  20. }
  21. for(auto iter : files_list)
  22. {
  23. printf("%s\n", iter.c_str());
  24. }
  25. closedir(dir);
  26. if(files_list.size() == dataset_size)
  27. {
  28. return true;
  29. }
  30. else {
  31. printf("[PedestrianDetection] : Error to search dataset %ld \n", files_list.size());
  32. return false;
  33. }
  34. }
  35. bool PedestrianDetection::init_net_info(std::string pkg_path)
  36. {
  37. yolov4tn = 2;
  38. outputchannel = 30; // class number change
  39. yolov4tnsize = {11, 15, 22, 30}; // class number change
  40. yolov4tnsize_sum = yolov4tnsize[0] * yolov4tnsize[1] + yolov4tnsize[2] * yolov4tnsize[3];
  41. accData_size = yolov4tnsize_sum * outputchannel;
  42. accRaw_size = yolov4tnsize_sum * (outputchannel + 2);
  43. return true;
  44. }
  45. void PedestrianDetection::init_fpga()
  46. {
  47. raw_img_w = 480;
  48. raw_img_h = 352;
  49. raw_img_c = 3;
  50. memset(&yolo_v4, 0, sizeof(struct Model_Output));
  51. yolo_v4.addr[0] = 0x4491F800;
  52. yolo_v4.length[0] = 11 * 15 * (30+2);
  53. yolo_v4.addr[1] = 0x44921000;
  54. yolo_v4.length[1] = 22 * 30 * (30+2);
  55. if(init_model_output(&yolo_v4, fd_mem) == -1)
  56. {
  57. printf("[yolov4] failed to init model output \n");
  58. exit(-1);
  59. }
  60. }
  61. void PedestrianDetection::read_dataset()
  62. {
  63. while(nh_private.ok())
  64. {
  65. while(ready_buf_.size() > 50)
  66. {
  67. std::this_thread::sleep_for(std::chrono::milliseconds(5));
  68. }
  69. #ifdef WITH_NETCAM
  70. cv::Mat origin;
  71. struct timeval timestamp;
  72. if(netcam.getCameraImage(origin, timestamp, cv::Size(1920, 1080)))
  73. {
  74. std::vector<cv::Mat> pics;
  75. cut_img(origin, 2, 2, pics);
  76. for(int i = 0; i < pics.size(); i++)
  77. {
  78. if(i != (pics.size()-1))
  79. {
  80. std::vector<uint8_t> vec_data;
  81. cv::imencode(".jpg", pics[i], vec_data);
  82. std::string str_data;
  83. str_data.assign(vec_data.begin(), vec_data.end());
  84. zmqpp::message message;
  85. message << "robot" + std::to_string(i+1) +"pic" << str_data;
  86. socket_pubraw.send(message);
  87. }
  88. else {
  89. cv::Mat resize;
  90. cv::resize(pics[i], resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
  91. cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);
  92. cv::Mat padding;
  93. cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
  94. rawImgFile tmp;
  95. tmp.raw_img = pics[i];
  96. tmp.pad_img = padding;
  97. tmp.file_name = robot_id;
  98. ready_buf_mutex_.lock();
  99. ready_buf_.push(tmp);
  100. ready_buf_mutex_.unlock();
  101. }
  102. }
  103. }
  104. else {
  105. printf("camera no data\n");
  106. usleep(30*1000);
  107. continue;
  108. }
  109. #else
  110. zmqpp::message message;
  111. socket_sub.receive(message);
  112. std::string topic;
  113. std::string data_str;
  114. message >> topic >> data_str;
  115. std::vector<uint8_t> data_vec;
  116. data_vec.assign(data_str.begin(), data_str.end());
  117. cv::Mat img_decode;
  118. img_decode = cv::imdecode(data_vec, CV_LOAD_IMAGE_COLOR);
  119. cv::Mat resize;
  120. cv::resize(img_decode, resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
  121. cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);
  122. cv::Mat padding;
  123. cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
  124. rawImgFile tmp;
  125. tmp.raw_img = img_decode;
  126. tmp.pad_img = padding;
  127. tmp.file_name = robot_id;
  128. ready_buf_mutex_.lock();
  129. ready_buf_.push(tmp);
  130. ready_buf_mutex_.unlock();
  131. #endif
  132. }
  133. }
  134. void PedestrianDetection::test_fpga()
  135. {
  136. while(nh_private.ok())
  137. {
  138. while(nh_private.ok())
  139. {
  140. if(ready_buf_.empty())
  141. {
  142. std::this_thread::sleep_for(std::chrono::milliseconds(5));
  143. }
  144. else
  145. {
  146. break;
  147. }
  148. }
  149. rawImgFile tmp_input;
  150. ready_buf_mutex_.lock();
  151. tmp_input = ready_buf_.front();
  152. ready_buf_.pop();
  153. ready_buf_mutex_.unlock();
  154. tmp_input.acc.acc_out = nullptr;
  155. tmp_input.acc.acc_out = new int8_t[accRaw_size];
  156. if(!tmp_input.acc.acc_out)
  157. {
  158. ROS_ERROR("Allocation of memory Failed \n");
  159. return;
  160. }
  161. int image_size = 481 * 353 * 3;
  162. image_to_mem(tmp_input.pad_img.data, 0x46000000, fd_mem, image_size);
  163. // TODO : start fpga
  164. void* mem_ctrl = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd_mem, 0x0400000000L);
  165. int state = 0;
  166. while(state != 1)
  167. {
  168. memcpy(&state, (char*)mem_ctrl+124, 4);
  169. }
  170. state = 1;
  171. memcpy((char*)mem_ctrl, &state, 4);
  172. int res = -1;
  173. memcpy(&res, (char*)mem_ctrl, 4);
  174. state = 0;
  175. memcpy((char*)mem_ctrl, &state, 4);
  176. memcpy(&res, (char*)mem_ctrl, 4);
  177. int busy = 0;
  178. memcpy(&busy, (char*)mem_ctrl+8, 4);
  179. int busy_prev = busy;
  180. while(busy_prev != 0)
  181. {
  182. memcpy(&busy, (char*)mem_ctrl+8, 4);
  183. if(busy != busy_prev)
  184. {
  185. busy_prev = busy;
  186. }
  187. }
  188. munmap(mem_ctrl, 4096);
  189. get_model_output(&yolo_v4, tmp_input.acc.acc_out);
  190. result_buf_mutex_.lock();
  191. result_buf_.push(tmp_input);
  192. result_buf_mutex_.unlock();
  193. }
  194. }
  195. void PedestrianDetection::write_predict()
  196. {
  197. while(nh_private.ok())
  198. {
  199. while(nh_private.ok())
  200. {
  201. if(result_buf_.empty())
  202. {
  203. std::this_thread::sleep_for(std::chrono::milliseconds(5));
  204. }
  205. else
  206. {
  207. break;
  208. }
  209. }
  210. rawImgFile tmp_output;
  211. result_buf_mutex_.lock();
  212. tmp_output = result_buf_.front();
  213. result_buf_.pop();
  214. result_buf_mutex_.unlock();
  215. decode(tmp_output);
  216. delete [] tmp_output.acc.acc_out;
  217. tmp_output.acc.acc_out = nullptr;
  218. }
  219. }
  220. static bool outbin_write = true;
  221. void PedestrianDetection::decode(rawImgFile &msg)
  222. {
  223. cv::Mat frame = msg.raw_img;
  224. if(msg.acc.acc_out != nullptr)
  225. {
  226. int8_t* data;
  227. data = msg.acc.acc_out;
  228. if(outbin_write)
  229. {
  230. FILE* fp = fopen((pkg_path + "/unit_test/result_img/out.bin").c_str(), "wb");
  231. fwrite(data, sizeof(int8_t), accRaw_size, fp);
  232. outbin_write = false;
  233. }
  234. float *out = (float*)calloc(accData_size, sizeof(float));
  235. unpadding(data, out, 32, 2);
  236. std::vector<de_result> res;
  237. process_decode(out, res, frame);
  238. draw_objects(frame, res);
  239. #if 1
  240. draw_objects(msg.raw_img, res);
  241. std::vector<uint8_t> vec_data;
  242. cv::imencode(".jpg", msg.raw_img, vec_data);
  243. std::string str_data;
  244. str_data.assign(vec_data.begin(), vec_data.end());
  245. zmqpp::message message;
  246. message << "pedestrian_detect" << str_data;
  247. socket_pub.send(message);
  248. // cv::imwrite((pkg_path + "/unit_test/result_img/" + img_name +"jpg"), msg.raw_img);
  249. /*
  250. std::ofstream outfile;
  251. outfile.open((pkg_path + "/unit_test/result_label/" + img_name +"txt"));
  252. for(size_t i = 0; i < res.size(); i++)
  253. {
  254. std::string str_conf = std::to_string(res[i].complex_prob);
  255. std::string conf_4(str_conf.begin(), str_conf.begin()+5);
  256. outfile << label_name[res[i].index] << " " << conf_4 << " " << static_cast<int>(res[i].x - res[i].w/2) << " " << static_cast<int>(res[i].y - res[i].h/2) << " " << static_cast<int>(res[i].x + res[i].w/2) << " " << static_cast<int>(res[i].y + res[i].h/2);
  257. if(i != (res.size()-1))
  258. outfile << std::endl;
  259. }
  260. outfile.close();*/
  261. /*
  262. FILE* fp = fopen((pkg_path + "/unit_test/result_bin/" + img_name +"bin").c_str(), "wb");
  263. fwrite(data, sizeof(int8_t), 11 * 15 * 96 + 22 * 30 * 96, fp);
  264. fclose(fp);
  265. */
  266. #endif
  267. free(out);
  268. }
  269. else
  270. {
  271. printf("acc is empty \n");
  272. }
  273. }
  274. void PedestrianDetection::unpadding(int8_t* input, float* output, int ic ,int padding)
  275. {
  276. int offset = yolov4tnsize[0] * yolov4tnsize[1];
  277. for(int i = 0; i < yolov4tnsize_sum; i++)
  278. {
  279. if(i < offset)
  280. {
  281. for(int j = 0; j < ic - padding; j++)
  282. {
  283. int tmp = *(input + j);
  284. *(output++) = (tmp - 78) * 0.1175554f;
  285. }
  286. }
  287. else
  288. {
  289. for(int j = 0; j < ic - padding; j++)
  290. {
  291. int tmp = *(input + j);
  292. *(output++) = (tmp - 88) * 0.1006139f;
  293. }
  294. }
  295. input += ic;
  296. }
  297. }
  298. inline float PedestrianDetection::sigmoid(const float &x)
  299. {
  300. return (1 / (1 + expf(-x)));
  301. }
  302. inline int PedestrianDetection::find_max_index(float *buf, int len)
  303. {
  304. int k = 0;
  305. float max = *buf;
  306. for (int i = 1; i < len; ++i)
  307. {
  308. if (buf[i] > max)
  309. {
  310. k = i;
  311. max = buf[i];
  312. }
  313. }
  314. return k;
  315. }
  316. bool comp_prob(const de_result &a, const de_result &b)
  317. {
  318. return a.complex_prob > b.complex_prob;
  319. }
  320. bool comp(const de_result &a, const de_result &b)
  321. {
  322. return a.index > b.index;
  323. }
  324. inline float PedestrianDetection::clac_iou(const de_result &A, const de_result &B)
  325. {
  326. float aminx = A.x - A.w / 2.f;
  327. float amaxx = A.x + A.w / 2.f;
  328. float aminy = A.y - A.h / 2.f;
  329. float amaxy = A.y + A.h / 2.f;
  330. float bminx = B.x - B.w / 2.f;
  331. float bmaxx = B.x + B.w / 2.f;
  332. float bminy = B.y - B.h / 2.f;
  333. float bmaxy = B.y + B.h / 2.f;
  334. float w = std::min(amaxx, bmaxx) - std::max(aminx, bminx);
  335. float h = std::min(amaxy, bmaxy) - std::max(aminy, bminy);
  336. if (w <= 0 || h <= 0)
  337. return 0;
  338. return (w * h) / ((A.w * A.h) + (B.w * B.h) - (w * h));
  339. }
  340. void PedestrianDetection::nms(const std::vector<de_result> &vpbox, std::vector<de_result> &voutbox, float iou_thres)
  341. {
  342. std::vector<float> flag(vpbox.size(), -1.0);
  343. int n = vpbox.size();
  344. for (int i = 0; i < n; i++)
  345. {
  346. flag[i] = vpbox[i].index;
  347. }
  348. for (int i = 0; i < n; i++)
  349. {
  350. if (flag[i] == -1.0)
  351. continue;
  352. for (int j = i + 1; j < n; j++)
  353. {
  354. if(flag[j] == -1.0 || vpbox[i].index != vpbox[j].index)
  355. continue;
  356. float iou = clac_iou(vpbox[i], vpbox[j]);
  357. if (iou > iou_thres)
  358. {
  359. flag[j] = -1.0;
  360. }
  361. }
  362. }
  363. for (int i = 0; i < n; i++)
  364. {
  365. if (flag[i] != -1.0)
  366. {
  367. voutbox.push_back(vpbox[i]);
  368. }
  369. }
  370. }
  371. void PedestrianDetection::decode_one_with_thre(int layer, float *anchors, void *data, const int xno, const int yno, const int anchor_no, std::vector<de_result>& boxes, cv::Mat &img)
  372. {
  373. const float *ori_ptr = (const float *)data;
  374. float decode_data[length], *data_ptr;
  375. data_ptr = decode_data;
  376. data_ptr[4] = sigmoid(ori_ptr[4]);
  377. for(int k = 5; k < length; k++)
  378. {
  379. data_ptr[k] = sigmoid(ori_ptr[k]);
  380. }
  381. int max_index = find_max_index(data_ptr+5, kind_nums);
  382. if(data_ptr[max_index+5] * data_ptr[4] < conf_thresh_)
  383. return;
  384. data_ptr[0] = sigmoid(ori_ptr[0]);
  385. data_ptr[1] = sigmoid(ori_ptr[1]);
  386. data_ptr[2] = expf(ori_ptr[2]);
  387. data_ptr[3] = expf(ori_ptr[3]);
  388. de_result tmp;
  389. tmp.x = (data_ptr[0] + xno) / yolov4tnsize[2 * layer + 1] * img.cols;
  390. tmp.y = (data_ptr[1] + yno) / yolov4tnsize[2 * layer] * img.rows;
  391. tmp.w = (((data_ptr[2]) * anchors[2 * anchor_no])) / yolov4tnsize[2 * layer + 1] * img.cols;
  392. tmp.h = (((data_ptr[3]) * anchors[2 * anchor_no + 1])) / yolov4tnsize[2 * layer] * img.rows;
  393. tmp.conf = data_ptr[4];
  394. tmp.index = max_index;
  395. tmp.prob = data_ptr[max_index+5];
  396. tmp.complex_prob = data_ptr[max_index+5] * data_ptr[4];
  397. // printf("## layer[%d] prob : %f , x : %f , y : %f , w : %f , h : %f \n", layer, tmp.prob, tmp.x, tmp.y, tmp.w, tmp.h);
  398. boxes.push_back(tmp);
  399. }
  400. void PedestrianDetection::process_decode(float* input, std::vector<de_result> &res, cv::Mat &img)
  401. {
  402. std::vector<de_result> boxes;
  403. float* data_ptr = input;
  404. for(int i = 0; i < yolov4tnsize[0]; i++){
  405. for(int j = 0; j < yolov4tnsize[1]; j++){
  406. for(int k = 0; k < 3; k++)
  407. {
  408. decode_one_with_thre(0, anchors_0, data_ptr, j, i, k, boxes, img);
  409. data_ptr += length;
  410. }
  411. }
  412. }
  413. for(int i = 0; i < yolov4tnsize[2]; i++){
  414. for(int j = 0; j < yolov4tnsize[3]; j++){
  415. for(int k = 0; k < 3; k++)
  416. {
  417. decode_one_with_thre(1, anchors_1, data_ptr, j, i, k, boxes, img);
  418. data_ptr += length;
  419. }
  420. }
  421. }
  422. sort(boxes.begin(), boxes.end(), comp_prob);
  423. nms(boxes, res, nms_thresh_);
  424. boxes.clear();
  425. }
  426. void PedestrianDetection::draw_objects(cv::Mat &image, const std::vector<de_result> &objects)
  427. {
  428. double font_size = 0.6;
  429. int font_bold = 1;
  430. int baseLine = 0;
  431. for(size_t i = 0; i < objects.size(); i++)
  432. {
  433. const de_result &obj = objects[i];
  434. sprintf(text, "%s %.1f%%", label_name[obj.index].c_str(), obj.complex_prob * 100);
  435. int x = std::max(static_cast<int>(obj.x - obj.w / 2), 0);
  436. int y = std::max(static_cast<int>(obj.y - obj.h / 2), 0);
  437. int x1 = std::min(static_cast<int>(obj.x + obj.w / 2), image.cols);
  438. int y1 = std::min(static_cast<int>(obj.y + obj.h / 2), image.rows);
  439. int w = x1 - x - 1;
  440. int h = y1 - y - 1;
  441. cv::Rect obj_rect(x, y, w, h);
  442. cv::rectangle(image, obj_rect, cv::Scalar(0, 255, 0), font_bold);
  443. cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_size, font_bold, &baseLine);
  444. int tx = obj_rect.x;
  445. int ty = obj_rect.y - label_size.height - baseLine;
  446. if (ty < 0)
  447. ty = 0;
  448. cv::Point point;
  449. point = cv::Point(tx, ty + label_size.height);
  450. cv::rectangle(image, cv::Rect(cv::Point(obj_rect.x, ty), cv::Size(obj_rect.width, label_size.height + baseLine)), cv::Scalar(128, 128, 0), CV_FILLED);
  451. cv::putText(image, text, point, cv::FONT_HERSHEY_SIMPLEX, font_size, cv::Scalar(0, 255, 0), font_bold);
  452. }
  453. }
  454. void PedestrianDetection::cut_img(cv::Mat &src, int m, int n, std::vector<cv::Mat> &ceil)
  455. {
  456. int height = src.rows;
  457. int width = src.cols;
  458. int ceil_h = height / m;
  459. int ceil_w = width / n;
  460. cv::Mat roi_img;
  461. for(int i = 0; i < m; i++)
  462. {
  463. for(int j = 0; j < n; j++)
  464. {
  465. cv::Rect rect(j*ceil_w, i*ceil_h, ceil_w, ceil_h);
  466. roi_img = cv::Mat(src, rect);
  467. ceil.push_back(roi_img);
  468. }
  469. }
  470. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/178174
推荐阅读
相关标签
  

闽ICP备14008679号