赞
踩
为什么要自己手写YOLO目标识别的图像后处理部分呢?因为我们使用了一款自研的FPGA开发板,并在其上开发部署了YOLO目标识别加速计算电路,作为PS端开发的我,需要把输入图像送到指定DDR地址,开启FPGA加速计算寄存器,等待计算结果发起中断,取回目标识别结果原始数据,经过后处理操作对其进行还原,并将结果标注到原始图像上。
话不多说,直接上代码!
通过下面这份代码,你至少应该学会以下几个操作:
1、学会如何从文件夹里读取数据集图片
2、学会如何将FPGA结果反量化,解码,极大值抑制等基本操作
3、学会如何将识别结果标注,保存
4、学会通过操作寄存器控制FPGA的运转
- static char text[256];
- static int length = 5+5;
- static int kind_nums = 5;
- static float anchors_0[6] = {2.53125f, 2.5625f, 4.21875f, 5.28125f, 10.75f, 9.96875f};
- static float anchors_1[6] = {1.4375f, 1.6875f, 2.3125f, 3.625f, 5.0625f, 5.125f};
- static float conf_thresh_ = 0.5f;
- static float nms_thresh_ = 0.4f;
-
- static std::string label_name[] = {"person", "aeroplane", "bus", "car", "truck"};
-
- bool PedestrianDetection::get_files_list()
- {
- struct dirent *ptr;
- DIR *dir;
- std::string PATH = dataset_dir_;
- dir = opendir(PATH.c_str());
- while((ptr=readdir(dir)) != nullptr)
- {
- if(ptr->d_name[0] == '.')
- continue;
- files_list.push_back(ptr->d_name);
- }
- for(auto iter : files_list)
- {
- printf("%s\n", iter.c_str());
- }
- closedir(dir);
- if(files_list.size() == dataset_size)
- {
- return true;
- }
- else {
- printf("[PedestrianDetection] : Error to search dataset %ld \n", files_list.size());
- return false;
- }
- }
-
- bool PedestrianDetection::init_net_info(std::string pkg_path)
- {
- yolov4tn = 2;
- outputchannel = 30; // class number change
- yolov4tnsize = {11, 15, 22, 30}; // class number change
- yolov4tnsize_sum = yolov4tnsize[0] * yolov4tnsize[1] + yolov4tnsize[2] * yolov4tnsize[3];
- accData_size = yolov4tnsize_sum * outputchannel;
- accRaw_size = yolov4tnsize_sum * (outputchannel + 2);
-
- return true;
- }
-
- void PedestrianDetection::init_fpga()
- {
- raw_img_w = 480;
- raw_img_h = 352;
- raw_img_c = 3;
- memset(&yolo_v4, 0, sizeof(struct Model_Output));
- yolo_v4.addr[0] = 0x4491F800;
- yolo_v4.length[0] = 11 * 15 * (30+2);
- yolo_v4.addr[1] = 0x44921000;
- yolo_v4.length[1] = 22 * 30 * (30+2);
- if(init_model_output(&yolo_v4, fd_mem) == -1)
- {
- printf("[yolov4] failed to init model output \n");
- exit(-1);
- }
- }
-
- void PedestrianDetection::read_dataset()
- {
- while(nh_private.ok())
- {
- while(ready_buf_.size() > 50)
- {
- std::this_thread::sleep_for(std::chrono::milliseconds(5));
- }
-
- #ifdef WITH_NETCAM
- cv::Mat origin;
- struct timeval timestamp;
- if(netcam.getCameraImage(origin, timestamp, cv::Size(1920, 1080)))
- {
- std::vector<cv::Mat> pics;
- cut_img(origin, 2, 2, pics);
- for(int i = 0; i < pics.size(); i++)
- {
- if(i != (pics.size()-1))
- {
- std::vector<uint8_t> vec_data;
- cv::imencode(".jpg", pics[i], vec_data);
- std::string str_data;
- str_data.assign(vec_data.begin(), vec_data.end());
- zmqpp::message message;
- message << "robot" + std::to_string(i+1) +"pic" << str_data;
- socket_pubraw.send(message);
- }
- else {
- cv::Mat resize;
- cv::resize(pics[i], resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
- cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);
-
- cv::Mat padding;
- cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
- rawImgFile tmp;
- tmp.raw_img = pics[i];
- tmp.pad_img = padding;
- tmp.file_name = robot_id;
- ready_buf_mutex_.lock();
- ready_buf_.push(tmp);
- ready_buf_mutex_.unlock();
- }
- }
- }
- else {
- printf("camera no data\n");
- usleep(30*1000);
- continue;
- }
- #else
- zmqpp::message message;
- socket_sub.receive(message);
- std::string topic;
- std::string data_str;
- message >> topic >> data_str;
- std::vector<uint8_t> data_vec;
- data_vec.assign(data_str.begin(), data_str.end());
-
- cv::Mat img_decode;
- img_decode = cv::imdecode(data_vec, CV_LOAD_IMAGE_COLOR);
- cv::Mat resize;
- cv::resize(img_decode, resize, cv::Size(raw_img_w, raw_img_h), cv::INTER_CUBIC); // cv::INTER_CUBIC
- cv::cvtColor(resize, resize, cv::COLOR_BGR2RGB);
-
- cv::Mat padding;
- cv::copyMakeBorder(resize, padding, 1, 0, 1, 0, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
- rawImgFile tmp;
- tmp.raw_img = img_decode;
- tmp.pad_img = padding;
- tmp.file_name = robot_id;
- ready_buf_mutex_.lock();
- ready_buf_.push(tmp);
- ready_buf_mutex_.unlock();
- #endif
-
- }
- }
-
- void PedestrianDetection::test_fpga()
- {
- while(nh_private.ok())
- {
- while(nh_private.ok())
- {
- if(ready_buf_.empty())
- {
- std::this_thread::sleep_for(std::chrono::milliseconds(5));
- }
- else
- {
- break;
- }
- }
-
- rawImgFile tmp_input;
- ready_buf_mutex_.lock();
- tmp_input = ready_buf_.front();
- ready_buf_.pop();
- ready_buf_mutex_.unlock();
-
- tmp_input.acc.acc_out = nullptr;
- tmp_input.acc.acc_out = new int8_t[accRaw_size];
- if(!tmp_input.acc.acc_out)
- {
- ROS_ERROR("Allocation of memory Failed \n");
- return;
- }
-
- int image_size = 481 * 353 * 3;
- image_to_mem(tmp_input.pad_img.data, 0x46000000, fd_mem, image_size);
-
- // TODO : start fpga
- void* mem_ctrl = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd_mem, 0x0400000000L);
-
- int state = 0;
- while(state != 1)
- {
- memcpy(&state, (char*)mem_ctrl+124, 4);
- }
- state = 1;
- memcpy((char*)mem_ctrl, &state, 4);
- int res = -1;
- memcpy(&res, (char*)mem_ctrl, 4);
-
- state = 0;
- memcpy((char*)mem_ctrl, &state, 4);
- memcpy(&res, (char*)mem_ctrl, 4);
-
- int busy = 0;
- memcpy(&busy, (char*)mem_ctrl+8, 4);
- int busy_prev = busy;
- while(busy_prev != 0)
- {
- memcpy(&busy, (char*)mem_ctrl+8, 4);
- if(busy != busy_prev)
- {
- busy_prev = busy;
- }
- }
- munmap(mem_ctrl, 4096);
-
- get_model_output(&yolo_v4, tmp_input.acc.acc_out);
-
- result_buf_mutex_.lock();
- result_buf_.push(tmp_input);
- result_buf_mutex_.unlock();
- }
- }
-
-
- void PedestrianDetection::write_predict()
- {
- while(nh_private.ok())
- {
- while(nh_private.ok())
- {
- if(result_buf_.empty())
- {
- std::this_thread::sleep_for(std::chrono::milliseconds(5));
- }
- else
- {
- break;
- }
- }
-
- rawImgFile tmp_output;
- result_buf_mutex_.lock();
- tmp_output = result_buf_.front();
- result_buf_.pop();
- result_buf_mutex_.unlock();
-
- decode(tmp_output);
-
- delete [] tmp_output.acc.acc_out;
- tmp_output.acc.acc_out = nullptr;
- }
- }
-
- static bool outbin_write = true;
- void PedestrianDetection::decode(rawImgFile &msg)
- {
- cv::Mat frame = msg.raw_img;
-
- if(msg.acc.acc_out != nullptr)
- {
- int8_t* data;
- data = msg.acc.acc_out;
-
- if(outbin_write)
- {
- FILE* fp = fopen((pkg_path + "/unit_test/result_img/out.bin").c_str(), "wb");
- fwrite(data, sizeof(int8_t), accRaw_size, fp);
- outbin_write = false;
- }
-
- float *out = (float*)calloc(accData_size, sizeof(float));
- unpadding(data, out, 32, 2);
- std::vector<de_result> res;
- process_decode(out, res, frame);
- draw_objects(frame, res);
-
- #if 1
-
- draw_objects(msg.raw_img, res);
- std::vector<uint8_t> vec_data;
- cv::imencode(".jpg", msg.raw_img, vec_data);
- std::string str_data;
- str_data.assign(vec_data.begin(), vec_data.end());
- zmqpp::message message;
- message << "pedestrian_detect" << str_data;
- socket_pub.send(message);
- // cv::imwrite((pkg_path + "/unit_test/result_img/" + img_name +"jpg"), msg.raw_img);
-
- /*
- std::ofstream outfile;
- outfile.open((pkg_path + "/unit_test/result_label/" + img_name +"txt"));
- for(size_t i = 0; i < res.size(); i++)
- {
- std::string str_conf = std::to_string(res[i].complex_prob);
- std::string conf_4(str_conf.begin(), str_conf.begin()+5);
- outfile << label_name[res[i].index] << " " << conf_4 << " " << static_cast<int>(res[i].x - res[i].w/2) << " " << static_cast<int>(res[i].y - res[i].h/2) << " " << static_cast<int>(res[i].x + res[i].w/2) << " " << static_cast<int>(res[i].y + res[i].h/2);
- if(i != (res.size()-1))
- outfile << std::endl;
- }
- outfile.close();*/
- /*
- FILE* fp = fopen((pkg_path + "/unit_test/result_bin/" + img_name +"bin").c_str(), "wb");
- fwrite(data, sizeof(int8_t), 11 * 15 * 96 + 22 * 30 * 96, fp);
- fclose(fp);
- */
- #endif
-
- free(out);
- }
- else
- {
- printf("acc is empty \n");
- }
-
- }
-
-
- void PedestrianDetection::unpadding(int8_t* input, float* output, int ic ,int padding)
- {
- int offset = yolov4tnsize[0] * yolov4tnsize[1];
- for(int i = 0; i < yolov4tnsize_sum; i++)
- {
- if(i < offset)
- {
- for(int j = 0; j < ic - padding; j++)
- {
- int tmp = *(input + j);
- *(output++) = (tmp - 78) * 0.1175554f;
- }
- }
- else
- {
- for(int j = 0; j < ic - padding; j++)
- {
- int tmp = *(input + j);
- *(output++) = (tmp - 88) * 0.1006139f;
- }
- }
-
- input += ic;
- }
- }
-
- inline float PedestrianDetection::sigmoid(const float &x)
- {
- return (1 / (1 + expf(-x)));
- }
-
- inline int PedestrianDetection::find_max_index(float *buf, int len)
- {
- int k = 0;
- float max = *buf;
- for (int i = 1; i < len; ++i)
- {
- if (buf[i] > max)
- {
- k = i;
- max = buf[i];
- }
- }
- return k;
- }
-
- bool comp_prob(const de_result &a, const de_result &b)
- {
- return a.complex_prob > b.complex_prob;
- }
-
- bool comp(const de_result &a, const de_result &b)
- {
- return a.index > b.index;
- }
-
- inline float PedestrianDetection::clac_iou(const de_result &A, const de_result &B)
- {
- float aminx = A.x - A.w / 2.f;
- float amaxx = A.x + A.w / 2.f;
- float aminy = A.y - A.h / 2.f;
- float amaxy = A.y + A.h / 2.f;
-
- float bminx = B.x - B.w / 2.f;
- float bmaxx = B.x + B.w / 2.f;
- float bminy = B.y - B.h / 2.f;
- float bmaxy = B.y + B.h / 2.f;
-
- float w = std::min(amaxx, bmaxx) - std::max(aminx, bminx);
- float h = std::min(amaxy, bmaxy) - std::max(aminy, bminy);
-
- if (w <= 0 || h <= 0)
- return 0;
-
- return (w * h) / ((A.w * A.h) + (B.w * B.h) - (w * h));
- }
-
- void PedestrianDetection::nms(const std::vector<de_result> &vpbox, std::vector<de_result> &voutbox, float iou_thres)
- {
- std::vector<float> flag(vpbox.size(), -1.0);
- int n = vpbox.size();
- for (int i = 0; i < n; i++)
- {
- flag[i] = vpbox[i].index;
- }
-
- for (int i = 0; i < n; i++)
- {
- if (flag[i] == -1.0)
- continue;
- for (int j = i + 1; j < n; j++)
- {
- if(flag[j] == -1.0 || vpbox[i].index != vpbox[j].index)
- continue;
- float iou = clac_iou(vpbox[i], vpbox[j]);
-
- if (iou > iou_thres)
- {
- flag[j] = -1.0;
- }
- }
- }
- for (int i = 0; i < n; i++)
- {
- if (flag[i] != -1.0)
- {
- voutbox.push_back(vpbox[i]);
- }
- }
- }
-
- void PedestrianDetection::decode_one_with_thre(int layer, float *anchors, void *data, const int xno, const int yno, const int anchor_no, std::vector<de_result>& boxes, cv::Mat &img)
- {
- const float *ori_ptr = (const float *)data;
- float decode_data[length], *data_ptr;
- data_ptr = decode_data;
-
- data_ptr[4] = sigmoid(ori_ptr[4]);
- for(int k = 5; k < length; k++)
- {
- data_ptr[k] = sigmoid(ori_ptr[k]);
- }
- int max_index = find_max_index(data_ptr+5, kind_nums);
- if(data_ptr[max_index+5] * data_ptr[4] < conf_thresh_)
- return;
-
- data_ptr[0] = sigmoid(ori_ptr[0]);
- data_ptr[1] = sigmoid(ori_ptr[1]);
- data_ptr[2] = expf(ori_ptr[2]);
- data_ptr[3] = expf(ori_ptr[3]);
-
- de_result tmp;
- tmp.x = (data_ptr[0] + xno) / yolov4tnsize[2 * layer + 1] * img.cols;
- tmp.y = (data_ptr[1] + yno) / yolov4tnsize[2 * layer] * img.rows;
- tmp.w = (((data_ptr[2]) * anchors[2 * anchor_no])) / yolov4tnsize[2 * layer + 1] * img.cols;
- tmp.h = (((data_ptr[3]) * anchors[2 * anchor_no + 1])) / yolov4tnsize[2 * layer] * img.rows;
- tmp.conf = data_ptr[4];
- tmp.index = max_index;
- tmp.prob = data_ptr[max_index+5];
- tmp.complex_prob = data_ptr[max_index+5] * data_ptr[4];
- // printf("## layer[%d] prob : %f , x : %f , y : %f , w : %f , h : %f \n", layer, tmp.prob, tmp.x, tmp.y, tmp.w, tmp.h);
- boxes.push_back(tmp);
- }
-
- void PedestrianDetection::process_decode(float* input, std::vector<de_result> &res, cv::Mat &img)
- {
- std::vector<de_result> boxes;
- float* data_ptr = input;
-
- for(int i = 0; i < yolov4tnsize[0]; i++){
- for(int j = 0; j < yolov4tnsize[1]; j++){
- for(int k = 0; k < 3; k++)
- {
- decode_one_with_thre(0, anchors_0, data_ptr, j, i, k, boxes, img);
- data_ptr += length;
- }
- }
- }
-
- for(int i = 0; i < yolov4tnsize[2]; i++){
- for(int j = 0; j < yolov4tnsize[3]; j++){
- for(int k = 0; k < 3; k++)
- {
- decode_one_with_thre(1, anchors_1, data_ptr, j, i, k, boxes, img);
- data_ptr += length;
- }
- }
- }
- sort(boxes.begin(), boxes.end(), comp_prob);
- nms(boxes, res, nms_thresh_);
- boxes.clear();
- }
-
-
- void PedestrianDetection::draw_objects(cv::Mat &image, const std::vector<de_result> &objects)
- {
- double font_size = 0.6;
- int font_bold = 1;
- int baseLine = 0;
- for(size_t i = 0; i < objects.size(); i++)
- {
- const de_result &obj = objects[i];
-
- sprintf(text, "%s %.1f%%", label_name[obj.index].c_str(), obj.complex_prob * 100);
- int x = std::max(static_cast<int>(obj.x - obj.w / 2), 0);
- int y = std::max(static_cast<int>(obj.y - obj.h / 2), 0);
- int x1 = std::min(static_cast<int>(obj.x + obj.w / 2), image.cols);
- int y1 = std::min(static_cast<int>(obj.y + obj.h / 2), image.rows);
- int w = x1 - x - 1;
- int h = y1 - y - 1;
-
- cv::Rect obj_rect(x, y, w, h);
- cv::rectangle(image, obj_rect, cv::Scalar(0, 255, 0), font_bold);
- cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_size, font_bold, &baseLine);
-
- int tx = obj_rect.x;
- int ty = obj_rect.y - label_size.height - baseLine;
- if (ty < 0)
- ty = 0;
-
- cv::Point point;
- point = cv::Point(tx, ty + label_size.height);
- cv::rectangle(image, cv::Rect(cv::Point(obj_rect.x, ty), cv::Size(obj_rect.width, label_size.height + baseLine)), cv::Scalar(128, 128, 0), CV_FILLED);
- cv::putText(image, text, point, cv::FONT_HERSHEY_SIMPLEX, font_size, cv::Scalar(0, 255, 0), font_bold);
- }
- }
-
- void PedestrianDetection::cut_img(cv::Mat &src, int m, int n, std::vector<cv::Mat> &ceil)
- {
- int height = src.rows;
- int width = src.cols;
- int ceil_h = height / m;
- int ceil_w = width / n;
- cv::Mat roi_img;
- for(int i = 0; i < m; i++)
- {
- for(int j = 0; j < n; j++)
- {
- cv::Rect rect(j*ceil_w, i*ceil_h, ceil_w, ceil_h);
- roi_img = cv::Mat(src, rect);
- ceil.push_back(roi_img);
- }
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。