赞
踩
最近翻了翻以前做的一些笔记,碰巧翻到了2019年刚开始学习OpenCV时候做的笔记,不知不觉已经过去两年了,这两年从一个小白到现在不是太小白的小白o(╥﹏╥)o,在此分享一下,希望能帮助到更多的人。
相关视频:https://www.bilibili.com/video/BV1FJ411T7W5?p=2
论文:https://github.com/SnailTyan/deep-learning-papers-translation
这里有很多翻译好的论文,很方便。
所需文件:二进制模型文件,模型参数描述文件,分类label文件。
模型下载:
http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel
卷积层提取特征,全连接层进行分类。
描述文件:bvlc_googlenet.prototxt
这个在opencv的源码里边有opencv-3.3.1\samples\data\dnn
模型输出为一个1000维的向量,代表1000个分类的概率。
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> using namespace cv; using namespace std; using namespace cv::dnn; String model_bin_file = "model/bvlc_googlenet.caffemodel"; String model_txt_file = "model/bvlc_googlenet.prototxt"; String labels_txt_file = "model/synset_words.txt"; vector<String> readLabels(); int main(int argc, char** argv) { Mat src = imread("pictures/girl.jpg"); if (src.empty()) { cout << "could not open image……" << endl; return -1; } namedWindow("src", WINDOW_FREERATIO); imshow("src", src); // 读取labels vector<String> labels = readLabels(); // 读取网络 包括模型描述文件和和模型文件 Net net = readNetFromCaffe(model_txt_file, model_bin_file); if (net.empty()) { cout << "net could not load……" << endl; return -1; } Mat inputBlob = blobFromImage(src, 1.0, Size(224, 224), Scalar(104, 117, 123)); Mat prob; for (size_t i = 0; i < 10; i++) { net.setInput(inputBlob, "data"); prob = net.forward("prob"); // 输出为1×1000 1000类的概率 } Mat proMat = prob.reshape(1, 1); // 单通道 一行 Point classNumber; double classProb; minMaxLoc(proMat, NULL, &classProb, NULL, &classNumber); int classidx = classNumber.x; cout << "current image classification:" << labels.at(classidx).c_str() << "possible:" << classProb << endl; putText(src, labels.at(classidx), Point(20, 20), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 0, 255), 1, 8); imshow("image", src); waitKey(0); return 0; } vector<String> readLabels() { vector<String> classNames; ifstream fin(labels_txt_file.c_str()); if (!fin.is_open()) { cout << "could not open the file……" << endl; exit(-1); } string name; while (!fin.eof()) { getline(fin, name); if (name.length()) { classNames.push_back(name.substr(name.find(" " + 1)));// 按空格的位置往后移一位进行分割 } } fin.close(); return classNames; }
模型下载:
https://github.com/weiliu89/caffe/tree/ssd#models
结构:
比传统的R-CNN要好很多。把两步和为一步,帧率得到了提高。
模型文件:还是有三个 二进制模型文件,模型参数描述文件,分类label文件
模型输出为一个7维向量 后四维为检测出来目标框的矩形坐标 倒数第5维为置信度
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> using namespace std; using namespace cv; using namespace cv::dnn; const size_t width = 300; const size_t height = 300; String labelFile = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\labelmap_ilsvrc_det.prototxt"; String modelFile = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel"; String model_text_file = "model\\models_VGGNet_ILSVRC2016_SSD_300x300\\models\\VGGNet\\ILSVRC2016\\SSD_300x300\\deploy.prototxt"; const int meanValues[3] = { 104, 117, 123 }; vector<String> readLabels(); static Mat getMean(const size_t &w, const size_t &h); static Mat preprocess(const Mat& frame); int main(int argc, char** argv) { Mat frame = imread("pictures/cat.jpg"); if (frame.empty()) { cout << "could not open image……" << endl; return -1; } namedWindow("input image", WINDOW_FREERATIO); imshow("input image", frame); vector<String> objNames = readLabels(); // import Caffe SSD model Net net = readNetFromCaffe(model_text_file, modelFile); if (net.empty()) { cout << "read caffe model data failure..." << endl; return -1; } Mat input_image = preprocess(frame); Mat blobImage = blobFromImage(input_image); net.setInput(blobImage, "data"); Mat detection = net.forward("detection_out"); Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); float confidence_threshold = 0.1; for (int i = 0; i < detectionMat.rows; i++) { // 输出为一个7维向量 后四维为检测出来目标框的矩形坐标 倒数第5维为置信度 float confidence = detectionMat.at<float>(i, 2); if (confidence > confidence_threshold) { size_t objIndex = (size_t)(detectionMat.at<float>(i, 1)); float tl_x = detectionMat.at<float>(i, 3) * frame.cols; float tl_y = detectionMat.at<float>(i, 4) * frame.rows; float br_x = detectionMat.at<float>(i, 5) * frame.cols; float br_y = detectionMat.at<float>(i, 6) * frame.rows; Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y)); rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0); putText(frame, format("%s", objNames[objIndex].c_str()), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2); } } imshow("ssd-demo", frame); waitKey(0); return 0; } vector<String> readLabels() { vector<String> objNames; ifstream fin(labelFile); if (!fin.is_open()) { cout << "could not load labeFile……" << endl; exit(-1); } string name; while (!fin.eof()) { getline(fin, name); if (name.length() && (name.find("display_name:") == 2)) { string temp = name.substr(17); temp.replace(temp.end() - 1, temp.end(), ""); objNames.push_back(temp); } } return objNames; } Mat getMean(const size_t& w, const size_t& h) { Mat mean; vector<Mat> channels; for (size_t i = 0; i < 3; i++) { Mat channel(h, w, CV_32F, Scalar(meanValues[i])); channels.push_back(channel); } merge(channels, mean); return mean; } Mat preprocess(const Mat& frame) { Mat preprocessed; frame.convertTo(preprocessed, CV_32F); resize(preprocessed, preprocessed, Size(width, height)); // 300*300 image Mat mean = getMean(width, height); subtract(preprocessed, mean, preprocessed); return preprocessed; }
对SSD模型进行了简化,从1000个分类缩减为20个。
还是模型二进制文件,模型描述文件,label文件。
模型下载地址:https://github.com/PINTO0309/MobileNet-SSD-RealSense/blob/master/caffemodel/MobileNetSSD/MobileNetSSD_deploy.caffemodel
注意要使用deploy版本的。
模型输出也为一个7维向量 后四维为检测出来目标框的矩形坐标 倒数第5维为置信度
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> using namespace std; using namespace cv; using namespace cv::dnn; const size_t width = 300; const size_t height = 300; // 下面这两个参数是官方的参数 const float meanVal = 127.5; const float scaleFactor = 0.0078; String labelFile = "model/mobileNetSSD/pascal-classes.txt"; String modelFile = "model/mobileNetSSD/MobileNetSSD_deploy.caffemodel"; String model_text_file = "model/mobileNetSSD/MobileNetSSD_deploy.prototxt"; vector<String> readLabels(); int main(int argc, char** argv) { VideoCapture capture; capture.open("pictures/vtest.avi"); namedWindow("input", CV_WINDOW_FREERATIO); namedWindow("ssd-video-demo", CV_WINDOW_FREERATIO); int w = capture.get(CAP_PROP_FRAME_WIDTH); int h = capture.get(CAP_PROP_FRAME_HEIGHT); printf("frame width:%d, frame height:%d\n", w, h); // set up net Net net = readNetFromCaffe(model_text_file, modelFile); if (net.empty()) { cout << "could not load NetModel……" << endl; return -1; } // read the label vector<String> classNames = readLabels(); Mat frame; int i = 0; while (capture.read(frame)) { i++; imshow("input", frame); // 预测 double t1 = (double)getTickCount(); Mat inputblob = blobFromImage(frame, scaleFactor, Size(width, height), meanVal, false); net.setInput(inputblob, "data"); Mat detection = net.forward("detection_out"); double t2 = (double)getTickCount(); cout << "第" << i << "帧" << "耗费时间:" << (t2 - t1) / getTickFrequency() << "s\n" << endl; // 绘制 Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); float confidence_threshold = 0.25; for (int i = 0; i < detectionMat.rows; i++) { float confidence = detectionMat.at<float>(i, 2); if (confidence > confidence_threshold) { size_t objIndex = (size_t)(detectionMat.at<float>(i, 1)); float tl_x = detectionMat.at<float>(i, 3) * frame.cols; float tl_y = detectionMat.at<float>(i, 4) * frame.rows; float br_x = detectionMat.at<float>(i, 5) * frame.cols; float br_y = detectionMat.at<float>(i, 6) * frame.rows; Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y)); rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0); //putText(frame, format("%s", classNames[objIndex]), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2); putText(frame, classNames[objIndex], Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2); } } imshow("ssd-video-demo", frame); char c = waitKey(50); if (c == 27) // ESC { break; } } waitKey(0); return 0; } vector<String> readLabels() { vector<String> objNames; ifstream fin(labelFile); if (!fin.is_open()) { cout << "could not load labeFile……" << endl; exit(-1); } string name; while (!fin.eof()) { getline(fin, name); if (name.length()) { string temp = name.substr(0, name.find(" ", 0)); objNames.push_back(temp); } } return objNames; }
论文:https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf
全卷积网络
模型与数据:
还是三个文件:
模型下载地址:https://github.com/shelhamer/fcn.berkeleyvision.org
模型输出为21×500×500的数组。21为channel,也就是类别。500×500为rows×cols,对应于图片中的每一个像素值。
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> #include <string.h> #include <stdio.h> using namespace std; using namespace cv; using namespace cv::dnn; const size_t width = 500; const size_t height = 500; String labelFile = "model\\FCN\\pascal-classes.txt"; String modelFile = "model\\FCN\\fcn8s-heavy-pascal.caffemodel"; String model_text_file = "model\\FCN\\fcn8s-heavy-pascal.prototxt"; Scalar meanValues = Scalar(104, 117, 123); vector<Vec3b> readColors(); vector<String> readLabels(); int main(int argc, char** argv) { Mat frame = imread("pictures/rgb.jpg"); //Mat frame = imread("E:/Dataset/Flange/picture_sample/水渍and砂眼/test2.jpg"); Mat img_gray; cvtColor(frame, img_gray, COLOR_BGR2GRAY); if (frame.empty()) { cout << "could not open image……" << endl; return -1; } namedWindow("input image", WINDOW_FREERATIO); imshow("input image", frame); resize(frame, frame, Size(500, 500)); vector<Vec3b> colors = readColors(); // import Caffe SSD model Net net = readNetFromCaffe(model_text_file, modelFile); if (net.empty()) { cout << "read caffe model data failure..." << endl; return -1; } Mat blobImage = blobFromImage(frame); // 预测 net.setInput(blobImage, "data"); Mat score = net.forward("score"); // 分割并显示 const int rows = score.size[2]; const int cols = score.size[3]; const int chns = score.size[1]; Mat maxCl(rows, cols, CV_8UC1); // 该像素处概率最大的那个channel 类别 Mat maxVal(rows, cols, CV_32FC1); // 该像素处概率最大的那个channel所对应的的概率值 该类别所对应的概率 这个值下边其实没用到 // setup LUT for (int c = 0; c < chns; c++) { for (int row = 0; row < rows; row++) { const float* ptrScore = score.ptr<float>(0, c, row); uchar* ptrMaxCl = maxCl.ptr<uchar>(row); float* ptrMaxVal = maxVal.ptr<float>(row); for (int col = 0; col < cols; col++) { if (ptrScore[col] > ptrMaxVal[col]) { ptrMaxVal[col] = ptrScore[col]; // 概率 ptrMaxCl[col] = (uchar)c; // 类别 } } } } // look up colors Mat result = Mat::zeros(rows, cols, CV_8UC3); for (int row = 0; row < rows; row++) { const uchar* ptrMaxCl = maxCl.ptr<uchar>(row); Vec3b* ptrColor = result.ptr<Vec3b>(row); for (int col = 0; col < cols; col++) { ptrColor[col] = colors[ptrMaxCl[col]]; // 取出每一个像素类别所对应的颜色 共21类 } } Mat dst; addWeighted(frame, 0.3, result, 0.7, 0, dst); imshow("FCN-demo", dst); waitKey(0); return 0; } vector<Vec3b> readColors() { vector<Vec3b> objColors; ifstream fin(labelFile); if (!fin.is_open()) { cout << "could not load labeFile……" << endl; exit(-1); } string line; while (!fin.eof()) { getline(fin, line); if (line.length()) { //string temp = color.substr(color.find(" ") + 1); stringstream ss(line); string name; int temp; Vec3b color; ss >> name; ss >> temp; color[0] = (uchar)temp; ss >> temp; color[1] = (uchar)temp; ss >> temp; color[2] = (uchar)temp; objColors.push_back(color); } } return objColors; } vector<String> readLabels() { vector<String> objNames; ifstream fin(labelFile); if (!fin.is_open()) { cout << "could not load labeFile……" << endl; exit(-1); } string name; while (!fin.eof()) { getline(fin, name); if (name.length() && (name.find("display_name:") == 2)) { string temp = name.substr(17); temp.replace(temp.end() - 1, temp.end(), ""); objNames.push_back(temp); } } return objNames; }
论文:https://talhassner.github.io/home/projects/cnn_agegender/CVPR2015_CNN_AgeGenderEstimation.pdf
模型以及描述文件下载:
https://talhassner.github.io/home/publication/2015_CVPR
使用模型的方式与之前的差不多,我自己写了一个,但是感觉年龄识别结果相当不准。
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn/dnn.hpp> #include <iostream> #include <fstream> using namespace std; using namespace cv; using namespace cv::dnn; string age_labels[] = { "0-2", "4-6", "8-13", "15-20", "25-32", "38-43", "48-53", "60-"}; string age_model_file = "model/ageClassication/age_net.caffemodel"; string age_model_prototxt = "model/ageClassication/deploy_age.prototxt"; string gender_labels[] = { "man", "woman"}; string gender_model_file = "model/genderClassication/gender_net.caffemodel"; string gender_model_prototxt = "model/genderClassication/deploy_gender.prototxt"; int main(int argc, char** argv) { system("color 0A"); // 加载图片 Mat img = imread("pictures/boy.jpg"); if (img.empty()) { cout << "could not load img……" << endl; return -1; } namedWindow("input", CV_WINDOW_AUTOSIZE); imshow("input", img); // 加载网络模型 Net age_net = readNetFromCaffe(age_model_prototxt, age_model_file); if (age_net.empty()) { cout << "could not load Net age_model……" << endl; exit(-1); } Net gender_net = readNetFromCaffe(gender_model_prototxt, gender_model_file); if (gender_net.empty()) { cout << "could not load Net gender_model……" << endl; exit(-1); } // 预测 Mat input = blobFromImage(img, 1.0, Size(227, 227)); age_net.setInput(input, "data"); Mat age_prob = age_net.forward("prob"); gender_net.setInput(input, "data"); Mat gender_prob = gender_net.forward("prob"); // 在图像上表示结果 Point age_class_Number; double age_class_Prob; Mat age_probMat = age_prob.reshape(1, 1); minMaxLoc(age_probMat, NULL, &age_class_Prob, NULL, &age_class_Number); int age_index = age_class_Number.x; cout << "对象年龄为:" << age_labels[age_index] << endl; cout << "概率为:" << age_class_Prob << endl; Point gender_class_Number; double gender_class_Prob; Mat gender_probMat = gender_prob.reshape(1, 1); minMaxLoc(gender_prob, NULL, &gender_class_Prob, NULL, &gender_class_Number); int gender_index = gender_class_Number.x; cout << "对象性别为:" << gender_labels[gender_index] << endl; cout << "概率为:" << gender_class_Prob << endl; putText(img, "age:" + age_labels[age_index], Point(20, 20), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 0, 255), 1, 8); putText(img, "gender:" + gender_labels[gender_index], Point(20, 40), FONT_HERSHEY_PLAIN, 1.5, Scalar(0, 255, 0), 1, 8); namedWindow("results", CV_WINDOW_AUTOSIZE); imshow("results", img); waitKey(0); return 0; }
把小孩识别成38-43岁……
视频里边用了一个文件haarcascade_frontalface_alt_tree.xml,先把人脸部分提取出来了:
主要使用了一个多尺度检测的函数detectMultiScale(),得到人脸所在的矩形区域,能够检测出来一张图片中的多张人脸。
然后直接把人脸部分输入,其他地方和上面的差不多。
#include <opencv2/opencv.hpp> #include <opencv2/dnn.hpp> #include <iostream> using namespace cv; using namespace cv::dnn; using namespace std; String haar_file = "D:/opencv/build/etc/haarcascades/haarcascade_frontalface_alt_tree.xml"; String age_model = "model/ageClassication/age_net.caffemodel"; String age_text = "model/ageClassication/deploy_age.prototxt"; String gender_model = "model/genderClassication/gender_net.caffemodel"; String gender_text = "model/genderClassication/deploy_gender.prototxt"; void predict_age(Net& net, Mat image); void predict_gender(Net& net, Mat image); int main(int argc, char** argv) { Mat src = imread("pictures/mutiFace1.jpg"); if (src.empty()) { printf("could not load image...\n"); return -1; } namedWindow("input", CV_WINDOW_AUTOSIZE); imshow("input", src); // 检测人脸区域 CascadeClassifier detector; detector.load(haar_file); vector<Rect> faces; Mat gray; cvtColor(src, gray, COLOR_BGR2GRAY); detector.detectMultiScale(gray, faces, 1.02, 1, 0, Size(40, 40), Size(1000, 1000)); // 加载网络模型 Net age_net = readNetFromCaffe(age_text, age_model); Net gender_net = readNetFromCaffe(gender_text, gender_model); for (size_t t = 0; t < faces.size(); t++) { rectangle(src, faces[t], Scalar(30, 255, 30), 2, 8, 0); predict_age(age_net, src(faces[t])); // 将人脸区域作为感兴趣区域输入网络 predict_gender(age_net, src(faces[t])); } imshow("age-gender-prediction-demo", src); waitKey(0); return 0; } vector<String> ageLabels() { vector<String> ages; ages.push_back("0-2"); ages.push_back("4 - 6"); ages.push_back("8 - 13"); ages.push_back("15 - 20"); ages.push_back("25 - 32"); ages.push_back("38 - 43"); ages.push_back("48 - 53"); ages.push_back("60-"); return ages; } void predict_age(Net& net, Mat image) { // 输入 Mat blob = blobFromImage(image, 1.0, Size(227, 227)); net.setInput(blob, "data"); // 预测分类 Mat prob = net.forward("prob"); Mat probMat = prob.reshape(1, 1); Point classNum; double classProb; vector<String> ages = ageLabels(); minMaxLoc(probMat, NULL, &classProb, NULL, &classNum); int classidx = classNum.x; putText(image, format("age:%s", ages.at(classidx).c_str()), Point(2, 10), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1); } void predict_gender(Net& net, Mat image) { // 输入 Mat blob = blobFromImage(image, 1.0, Size(227, 227)); net.setInput(blob, "data"); // 预测分类 Mat prob = net.forward("prob"); Mat probMat = prob.reshape(1, 1); putText(image, format("gender:%s", (probMat.at<float>(0, 0) > probMat.at<float>(0, 1) ? "M" : "F")), Point(2, 20), FONT_HERSHEY_PLAIN, 0.8, Scalar(0, 0, 255), 1); }
GOTURN(Generic Object Tricking Using Regression Networks)使用回归网络进行追踪
资料参考:https://zhuanlan.zhihu.com/p/25338674
整个算法的框架其实非常简单:输入当前帧和前一帧进入网络,输出当前帧bounding-box的位置。
网络输出目标在search region上的相对坐标(top-left和bottom-right)。
模型下载:
https://github.com/opencv/opencv_extra/tree/c4219d5eb3105ed8e634278fad312a1a8d2c182d/testdata/tracking
note: 这四个压缩包都得下载,否则会解压出错。
可以参考opencv的samples里边的例子:https://github.com/opencv/opencv_contrib/blob/3.3.1/modules/tracking/samples/goturnTracker.cpp
该网络输入为上一帧要追踪的区域data1和当前帧区域data2,输出为单通道4×1的Mat:
表示上一帧中要追踪的box在当前帧中预测的box的位置(左上角和右下角坐标)。
输入:
input: "data1"
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227
input: "data2"
input_dim: 1
input_dim: 3
input_dim: 227
input_dim: 227
#include <opencv2/core.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/dnn/dnn.hpp> #include <opencv2/video/video.hpp> #include <iostream> #include <fstream> using namespace std; using namespace cv; using namespace cv::dnn; string model_file = "model/GOTURN/goturn.caffemodel"; string model_prototxt = "model/GOTURN/goturn.prototxt"; Net net; Rect trackObjects(Mat& frame, Mat& prevFrame); Mat frame, prevFrame; Rect prevBB; int main(int argc, char** argv) { net = readNetFromCaffe(model_prototxt, model_file); if (net.empty()) { cout << "could not load model file……"; exit(-1); } VideoCapture capture; capture.open("pictures/vtest.avi"); capture.read(frame); frame.copyTo(prevFrame); prevBB = selectROI(frame, false, false); namedWindow("frame", CV_WINDOW_AUTOSIZE); while (capture.read(frame)) { Rect currentBB = trackObjects(frame, prevFrame); rectangle(frame, currentBB, Scalar(0, 0, 255), 2, 8, 0); // ready for next frame frame.copyTo(prevFrame); prevBB.x = currentBB.x; prevBB.y = currentBB.y; prevBB.width = currentBB.width; prevBB.height = currentBB.height; imshow("frame", frame); char c = waitKey(50); if (c == 27) { break; } } } Rect trackObjects(Mat& frame, Mat& prevFrame) { Rect rect; int INPUT_SIZE = 227; //Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB Mat curFrame = frame.clone(); Rect2d curBB; float padTargetPatch = 2.0; Rect2f searchPatchRect, targetPatchRect; Point2f currCenter, prevCenter; Mat prevFramePadded, curFramePadded; Mat searchPatch, targetPatch; // 上一帧box的中心 prevCenter.x = (float)(prevBB.x + prevBB.width / 2); prevCenter.y = (float)(prevBB.y + prevBB.height / 2); // 接受padTargetPatch倍的背景 targetPatchRect.width = (float)(prevBB.width * padTargetPatch); targetPatchRect.height = (float)(prevBB.height * padTargetPatch); targetPatchRect.x = prevCenter.x + targetPatchRect.width / 2.0; // 这里因为下面使用的是边界填充之后的prevFramePadded,等于说又加了个targetPatchRect.width,所以这里是加targetPatchRect.width / 2.0 targetPatchRect.y = prevCenter.y + targetPatchRect.height / 2.0; // 对上一帧边界进行填充,并提取出框出的目标targetPatch copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE); targetPatch = prevFramePadded(targetPatchRect).clone(); // 对当前帧边界进行填充,并提取出目标targetPatch copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE); searchPatch = curFramePadded(targetPatchRect).clone(); //Preprocess //Resize resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE)); resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE)); //Mean Subtract targetPatch = targetPatch - 128; searchPatch = searchPatch - 128; //Convert to Float type targetPatch.convertTo(targetPatch, CV_32F); searchPatch.convertTo(searchPatch, CV_32F); Mat targetBlob = blobFromImage(targetPatch); Mat searchBlob = blobFromImage(searchPatch); net.setInput(targetBlob, "data1"); net.setInput(searchBlob, "data2"); Mat res = net.forward("scale"); Mat resMat = res.reshape(1, 1); //printf("width : %d, height : %d\n", (resMat.at<float>(2) - resMat.at<float>(0)), (resMat.at<float>(3) - resMat.at<float>(1))); curBB.x = (double)targetPatchRect.x + (double)(resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - (double)targetPatchRect.width; curBB.y = (double)targetPatchRect.y + (double)(resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - (double)targetPatchRect.height; curBB.width = (resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE; curBB.height = (resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE; //Predicted BB Rect boundingBox = curBB; return boundingBox; }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。