赞
踩
上一个教程 : 高级 API: 文本检测模型和文本识别模型
下一个教程 : 转换 PyTorch 分类模型并使用 OpenCV Python 发布
原作者 | Chengrui Wang, Yuantao Feng |
---|---|
兼容性 | OpenCV >= 4.5.4 |
本节将介绍用于人脸检测的 cv::FaceDetectorYN 类和用于人脸识别的 cv::FaceRecognizerSF 类。
本模块需要两个预先训练好的模型(ONNX 格式):
数据库 | 准确率 | 阈值 (normL2) | 阈值 (余弦) |
---|---|---|---|
LFW | 99.60% | 1.128 | 0.363 |
CALFW | 93.95% | 1.149 | 0.340 |
CPLFW | 91.05% | 1.204 | 0.275 |
AgeDB-30 | 94.90% | 1.202 | 0.277 |
CFP-FP | 94.80% | 1.253 | 0.212 |
C++
#include <opencv2/dnn.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> #include <opencv2/objdetect.hpp> #include <iostream> using namespace cv; using namespace std; static void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2) { std::string fpsString = cv::format("FPS : %.2f", (float)fps); if (frame >= 0) cout << "Frame " << frame << ", "; cout << "FPS: " << fpsString << endl; for (int i = 0; i < faces.rows; i++) { // 打印结果 cout << "Face " << i << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), " << "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", " << "score: " << cv::format("%.2f", faces.at<float>(i, 14)) << endl; // 绘制边界框 rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness); // 绘制地标 circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness); circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness); circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness); circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness); circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness); } putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2); } int main(int argc, char** argv) { CommandLineParser parser(argc, argv, "{help h | | Print this message}" "{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}" "{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}" "{video v | 0 | Path to the input video}" "{scale sc | 1.0 | Scale factor used to resize input video frames}" "{fd_model fd | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}" "{fr_model fr | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}" "{score_threshold | 0.9 | Filter out faces of score < score_threshold}" "{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}" "{top_k | 5000 | Keep top_k bounding boxes before NMS}" "{save s | false | Set true to save results. This flag is invalid when using camera}" ); if (parser.has("help")) { parser.printMessage(); return 0; } String fd_modelPath = parser.get<String>("fd_model"); String fr_modelPath = parser.get<String>("fr_model"); float scoreThreshold = parser.get<float>("score_threshold"); float nmsThreshold = parser.get<float>("nms_threshold"); int topK = parser.get<int>("top_k"); bool save = parser.get<bool>("save"); float scale = parser.get<float>("scale"); double cosine_similar_thresh = 0.363; double l2norm_similar_thresh = 1.128; // 初始化 FaceDetectorYN Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK); TickMeter tm; // 如果输入是图像 if (parser.has("image1")) { String input1 = parser.get<String>("image1"); Mat image1 = imread(samples::findFile(input1)); if (image1.empty()) { std::cerr << "Cannot read image: " << input1 << std::endl; return 2; } int imageWidth = int(image1.cols * scale); int imageHeight = int(image1.rows * scale); resize(image1, image1, Size(imageWidth, imageHeight)); tm.start(); // 在推理之前设置输入大小 detector->setInputSize(image1.size()); Mat faces1; detector->detect(image1, faces1); if (faces1.rows < 1) { std::cerr << "Cannot find a face in " << input1 << std::endl; return 1; } tm.stop(); // 在输入图像上绘制结果 visualize(image1, -1, faces1, tm.getFPS()); // 如果保存为 true,则保存结果 if (save) { cout << "Saving result.jpg...\n"; imwrite("result.jpg", image1); } // 可视化结果 imshow("image1", image1); pollKey(); // 处理用户界面事件以显示内容 if (parser.has("image2")) { String input2 = parser.get<String>("image2"); Mat image2 = imread(samples::findFile(input2)); if (image2.empty()) { std::cerr << "Cannot read image2: " << input2 << std::endl; return 2; } tm.reset(); tm.start(); detector->setInputSize(image2.size()); Mat faces2; detector->detect(image2, faces2); if (faces2.rows < 1) { std::cerr << "Cannot find a face in " << input2 << std::endl; return 1; } tm.stop(); visualize(image2, -1, faces2, tm.getFPS()); if (save) { cout << "Saving result2.jpg...\n"; imwrite("result2.jpg", image2); } imshow("image2", image2); pollKey(); // 初始化 FaceRecognizerSF Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, ""); // 通过检测到的第一个人脸对齐和裁剪人脸图像。 Mat aligned_face1, aligned_face2; faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1); faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2); // 使用给定的对齐面进行特征提取 Mat feature1, feature2; faceRecognizer->feature(aligned_face1, feature1); feature1 = feature1.clone(); faceRecognizer->feature(aligned_face2, feature2); feature2 = feature2.clone(); double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE); double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2); if (cos_score >= cosine_similar_thresh) { std::cout << "They have the same identity;"; } else { std::cout << "They have different identities;"; } std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n"; if (L2_score <= l2norm_similar_thresh) { std::cout << "They have the same identity;"; } else { std::cout << "They have different identities."; } std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n"; } cout << "Press any key to exit..." << endl; waitKey(0); } else { int frameWidth, frameHeight; VideoCapture capture; std::string video = parser.get<string>("video"); if (video.size() == 1 && isdigit(video[0])) capture.open(parser.get<int>("video")); else capture.open(samples::findFileOrKeep(video)); // 保留 GStreamer 管道 if (capture.isOpened()) { frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale); frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale); cout << "Video " << video << ": width=" << frameWidth << ", height=" << frameHeight << endl; } else { cout << "Could not initialize video capturing: " << video << "\n"; return 1; } detector->setInputSize(Size(frameWidth, frameHeight)); cout << "Press 'SPACE' to save frame, any other key to exit..." << endl; int nFrame = 0; for (;;) { // 获取帧 Mat frame; if (!capture.read(frame)) { cerr << "Can't grab frame!Stop\n"; break; } resize(frame, frame, Size(frameWidth, frameHeight)); // 推理 Mat faces; tm.start(); detector->detect(frame, faces); tm.stop(); Mat result = frame.clone(); // 在输入图像上绘制结果 visualize(result, nFrame, faces, tm.getFPS()); // 可视化结果 imshow("Live", result); int key = waitKey(1); bool saveFrame = save; if (key == ' ') { saveFrame = true; key = 0; // 处理 } if (saveFrame) { std::string frame_name = cv::format("frame_%05d.png", nFrame); std::string result_name = cv::format("result_%05d.jpg", nFrame); cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n"; imwrite(frame_name, frame); imwrite(result_name, result); } ++nFrame; if (key > 0) break; } cout << "Processed " << nFrame << " frames" << endl; } cout << "Done." << endl; return 0; }
Python
import argparse import numpy as np import cv2 as cv def str2bool(v): if v.lower() in ['on', 'yes', 'true', 'y', 't']: return True elif v.lower() in ['off', 'no', 'false', 'n', 'f']: return False else: raise NotImplementedError parser = argparse.ArgumentParser() parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.') parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.') parser.add_argument('--video', '-v', type=str, help='Path to the input video.') parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.') parser.add_argument('--face_detection_model', '-fd', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the face detection model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet') parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface') parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.') parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.') parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.') parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.') args = parser.parse_args() def visualize(input, faces, fps, thickness=2): if faces[1] is not None: for idx, face in enumerate(faces[1]): print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1])) coords = face[:-1].astype(np.int32) cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness) cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness) cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness) cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness) cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness) cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness) cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if __name__ == '__main__': detector = cv.FaceDetectorYN.create( args.face_detection_model, "", (320, 320), args.score_threshold, args.nms_threshold, args.top_k ) tm = cv.TickMeter() # 如果输入是图像 if args.image1 is not None: img1 = cv.imread(cv.samples.findFile(args.image1)) img1Width = int(img1.shape[1]*args.scale) img1Height = int(img1.shape[0]*args.scale) img1 = cv.resize(img1, (img1Width, img1Height)) tm.start() detector.setInputSize((img1Width, img1Height)) faces1 = detector.detect(img1) tm.stop() assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1) # 在输入图像上绘制结果 visualize(img1, faces1, tm.getFPS()) # 如果保存为 true,则保存结果 if args.save: print('Results saved to result.jpg\n') cv.imwrite('result.jpg', img1) # 在新窗口中显示结果 cv.imshow("image1", img1) if args.image2 is not None: img2 = cv.imread(cv.samples.findFile(args.image2)) tm.reset() tm.start() detector.setInputSize((img2.shape[1], img2.shape[0])) faces2 = detector.detect(img2) tm.stop() assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2) visualize(img2, faces2, tm.getFPS()) cv.imshow("image2", img2) recognizer = cv.FaceRecognizerSF.create( args.face_recognition_model,"") face1_align = recognizer.alignCrop(img1, faces1[1][0]) face2_align = recognizer.alignCrop(img2, faces2[1][0]) # 提取特征 face1_feature = recognizer.feature(face1_align) face2_feature = recognizer.feature(face2_align) cosine_similarity_threshold = 0.363 l2_similarity_threshold = 1.128 cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE) l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2) msg = 'different identities' if cosine_score >= cosine_similarity_threshold: msg = 'the same identity' print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold)) msg = 'different identities' if l2_score <= l2_similarity_threshold: msg = 'the same identity' print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold)) cv.waitKey(0) else: # 省略输入,调用默认摄像机 if args.video is not None: deviceId = args.video else: deviceId = 0 cap = cv.VideoCapture(deviceId) frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale) frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale) detector.setInputSize([frameWidth, frameHeight]) while cv.waitKey(1) < 0: hasFrame, frame = cap.read() if not hasFrame: print('No frames grabbed!') break frame = cv.resize(frame, (frameWidth, frameHeight)) # 推断 tm.start() faces = detector.detect(frame) # faces 是一个元组 tm.stop() # 在输入图像上绘制结果 visualize(frame, faces, tm.getFPS()) # 可视化结果 cv.imshow('Live', frame) cv.destroyAllWindows()
C++
// Initialize FaceDetectorYN
Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
// Set input size before inference
detector->setInputSize(image1.size());
Mat faces1;
detector->detect(image1, faces1);
if (faces1.rows < 1)
{
std::cerr << "Cannot find a face in " << input1 << std::endl;
return 1;
}
Python
detector = cv.FaceDetectorYN.create(
args.face_detection_model,
"",
(320, 320),
args.score_threshold,
args.nms_threshold,
args.top_k
)
# Set input size before inference
detector.setInputSize((img1Width, img1Height))
faces1 = detector.detect(img1)
检测输出的人脸是一个 CV_32F 类型的二维数组,其行是检测到的人脸实例,列是人脸的位置和 5 个人脸地标。每行的格式如下
x1,y1,w,h,x_re,y_re,x_le,y_le,x_nt,y_nt,x_rcm,y_rcm,x_lcm,y_lcm
其中,x1、y1、w、h
分别为人脸边界框的左上角坐标、宽度和高度,{x, y}_{re, le, nt, rcm, lcm}
分别代表右眼、左眼、鼻尖、右嘴角和左嘴角的坐标。
人脸检测后,运行以下代码从面部图像中提取人脸特征。
C++
// Initialize FaceRecognizerSF
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
// Aligning and cropping facial image through the first face of faces detected.
Mat aligned_face1, aligned_face2;
faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
// Run feature extraction with given aligned_face
Mat feature1, feature2;
faceRecognizer->feature(aligned_face1, feature1);
feature1 = feature1.clone();
faceRecognizer->feature(aligned_face2, feature2);
feature2 = feature2.clone();
Python
recognizer = cv.FaceRecognizerSF.create(
args.face_recognition_model,"")
# Align faces
face1_align = recognizer.alignCrop(img1, faces1[1][0])
face2_align = recognizer.alignCrop(img2, faces2[1][0])
# Extract features
face1_feature = recognizer.feature(face1_align)
face2_feature = recognizer.feature(face2_align)
获得两张人脸图像的人脸特征 feature1 和 feature2 后,运行下面的代码来计算两张人脸的身份差异。
C++
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
Python
cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
例如,如果余弦距离大于或等于 0.363,或 normL2 距离小于或等于 1.128,则两张脸具有相同的身份。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。