1.训练环境系统是win10,Anaconda 3.5,python3.7,PyTorch 1.6,显卡RTX3080;cuda10.2,cudnn7.1。
2.模型部署环境PC用的是Vs2019, OpenCV4.5,用了ncnn做推理加速库,ncnn的版本是20220216这个版本。
#include "yoloface.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#define clip(x, y) (x < 0 ? 0 : (x > y ? y : x))
static inline float intersection_area(const Object& a, const Object& b)
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
while (faceobjects[i].prob > p)
while (faceobjects[j].prob < p)
if (i <= j)
// swap
std::swap(faceobjects[i], faceobjects[j]);
#pragma omp parallel sections
#pragma omp section
if (left < j) qsort_descent_inplace(faceobjects, left, j);
#pragma omp section
if (i < right) qsort_descent_inplace(faceobjects, i, right);
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
if (faceobjects.empty())
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
areas[i] = faceobjects[i].rect.area();
for (int i = 0; i < n; i++)
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
if (keep)
static inline float sigmoid(float x)
return static_cast<float>(1.f / (1.f + exp(-x)));
static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
const int num_grid = feat_blob.h;
int num_grid_x;
int num_grid_y;
if (in_pad.w > in_pad.h)
num_grid_x = in_pad.w / stride;
num_grid_y = num_grid / num_grid_x;
num_grid_y = in_pad.h / stride;
num_grid_x = num_grid / num_grid_y;
const int num_class = feat_blob.w - 5-10;
const int num_anchors = anchors.w / 2;
for (int q = 0; q < num_anchors; q++)
const float anchor_w = anchors[q * 2];
const float anchor_h = anchors[q * 2 + 1];
const ncnn::Mat feat = feat_blob.channel(q);
for (int i = 0; i < num_grid_y; i++)
for (int j = 0; j < num_grid_x; j++)
const float* featptr = feat.row(i * num_grid_x + j);
// find class index with max class score
int class_index = 0;
float class_score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
float score = featptr[5 +10+ k];
if (score > class_score)
class_index = k;
class_score = score;
float box_score = featptr[4];
float confidence = sigmoid(box_score); //* sigmoid(class_score);
if (confidence >= prob_threshold)
float dx = sigmoid(featptr[0]);
float dy = sigmoid(featptr[1]);
float dw = sigmoid(featptr[2]);
float dh = sigmoid(featptr[3]);
float pb_cx = (dx * 2.f - 0.5f + j) * stride;
float pb_cy = (dy * 2.f - 0.5f + i) * stride;
float pb_w = pow(dw * 2.f, 2) * anchor_w;
float pb_h = pow(dh * 2.f, 2) * anchor_h;
float x0 = pb_cx - pb_w * 0.5f;
float y0 = pb_cy - pb_h * 0.5f;
float x1 = pb_cx + pb_w * 0.5f;
float y1 = pb_cy + pb_h * 0.5f;
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = class_index;
obj.prob = confidence;
for (int l = 0; l < 5; l++)
float x = featptr[2 * l + 5] * anchor_w + j * stride;
float y = featptr[2 * l + 1 + 5] * anchor_h + i * stride;
obj.pts.push_back(cv::Point2f(x, y));
int YoloFace::loadModel(std::string model, bool use_gpu)
bool has_gpu = false;
face_net.opt = ncnn::Option();
has_gpu = ncnn::get_gpu_count() > 0;
bool to_use_gpu = has_gpu && use_gpu;
face_net.opt.use_vulkan_compute = to_use_gpu;
face_net.load_model((model + ".bin").c_str());
return 0;
int YoloFace::detection(const cv::Mat& rgb, std::vector<Object>& objects, float prob_threshold, float nms_threshold)
int img_w = rgb.cols;
int img_h = rgb.rows;
// letterbox pad to multiple of 32
int w = img_w;
int h = img_h;
float scale = 1.f;
if (w > h)
scale = (float)target_size / w;
w = target_size;
h = h * scale;
scale = (float)target_size / h;
h = target_size;
w = w * scale;
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgb.data, ncnn::Mat::PIXEL_RGB, img_w, img_h, w, h);
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = face_net.create_extractor();
ex.input("data", in_pad);
std::vector<Object> proposals;
// anchor setting from yolov5/models/yolov5s.yaml
// stride 8
ncnn::Mat out;
ex.extract("981", out);
ncnn::Mat anchors(6);
anchors[0] = 4.f;
anchors[1] = 5.f;
anchors[2] = 8.f;
anchors[3] = 10.f;
anchors[4] = 13.f;
anchors[5] = 16.f;
std::vector<Object> objects8;
generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
// stride 16
ncnn::Mat out;
ex.extract("983", out);
ncnn::Mat anchors(6);
anchors[0] = 23.f;
anchors[1] = 29.f;
anchors[2] = 43.f;
anchors[3] = 55.f;
anchors[4] = 73.f;
anchors[5] = 105.f;
std::vector<Object> objects16;
generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
proposals.insert(proposals.end(), objects16.begin(), objects16.end());
// stride 32
ncnn::Mat out;
ex.extract("985", out);
ncnn::Mat anchors(6);
anchors[0] = 146.f;
anchors[1] = 217.f;
anchors[2] = 231.f;
anchors[3] = 300.f;
anchors[4] = 335.f;
anchors[5] = 433.f;
std::vector<Object> objects32;
generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
proposals.insert(proposals.end(), objects32.begin(), objects32.end());
// sort all proposals by score from highest to lowest
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
for (int i = 0; i < count; i++)
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
for (int j = 0; j < objects[i].pts.size(); j++)
float ptx = (objects[i].pts[j].x - (wpad / 2)) / scale;
float pty = (objects[i].pts[j].y - (hpad / 2)) / scale;
objects[i].pts[j] = cv::Point2f(ptx, pty);
// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
return 0;
1.当检测到人脸加关键点之后,就可以对人脸做一个空间归一化,这个空间归一化的操作就叫:人脸对齐(face alignment).这个操作可以使后续模型提取到与五官的位置无关,只有五官的形状纹理相关的特征。可以极大地提升人脸识别,人脸属性分析,表情分类等算法的性能和稳定性。
#include "FaceWarp.h"
inline double count_angle(float landmark[5][2])
double a = landmark[2][1] - (landmark[0][1] + landmark[1][1]) / 2;
double b = landmark[2][0] - (landmark[0][0] + landmark[1][0]) / 2;
double angle = atan(abs(b) / a) * 180.0 / M_PI;
return angle;
cv::Mat FaceWarp::MeanAxis0(const cv::Mat &src)
int num = src.rows;
int dim = src.cols;
// x1 y1
// x2 y2
cv::Mat output(1,dim,CV_32F);
for(int i = 0 ; i < dim; i++){
float sum = 0 ;
for(int j = 0 ; j < num ; j++){
output.at<float>(0,i) = sum/num;
return output;
cv::Mat FaceWarp::ElementwiseMinus(const cv::Mat &A,const cv::Mat &B)
cv::Mat output(A.rows,A.cols,A.type());
assert(B.cols == A.cols);
if(B.cols == A.cols)
for(int i = 0 ; i < A.rows; i ++)
for(int j = 0 ; j < B.cols; j++)
output.at<float>(i,j) = A.at<float>(i,j) - B.at<float>(0,j);
return output;
int FaceWarp::MatrixRank(cv::Mat M)
cv::Mat w, u, vt;
cv::SVD::compute(M, w, u, vt);
cv::Mat1b nonZeroSingularValues = w > 0.0001;
int rank = countNonZero(nonZeroSingularValues);
return rank;
cv::Mat FaceWarp::VarAxis0(const cv::Mat &src)
cv::Mat temp_ = ElementwiseMinus(src,MeanAxis0(src));
cv::multiply(temp_ ,temp_ ,temp_ );
return MeanAxis0(temp_);
cv::Mat FaceWarp::SimilarTransform(cv::Mat src,cv::Mat dst)
int num = src.rows;
int dim = src.cols;
cv::Mat src_mean = MeanAxis0(src);
cv::Mat dst_mean = MeanAxis0(dst);
cv::Mat src_demean = ElementwiseMinus(src, src_mean);
cv::Mat dst_demean = ElementwiseMinus(dst, dst_mean);
cv::Mat A = (dst_demean.t() * src_demean) / static_cast<float>(num);
cv::Mat d(dim, 1, CV_32F);
if (cv::determinant(A) < 0) {
d.at<float>(dim - 1, 0) = -1;
cv::Mat T = cv::Mat::eye(dim + 1, dim + 1, CV_32F);
cv::Mat U, S, V;
cv::SVD::compute(A, S,U, V);
int rank = MatrixRank(A);
if (rank == 0) {
assert(rank == 0);
} else if (rank == dim - 1) {
if (cv::determinant(U) * cv::determinant(V) > 0) {
T.rowRange(0, dim).colRange(0, dim) = U * V;
} else {
int s = d.at<float>(dim - 1, 0) = -1;
d.at<float>(dim - 1, 0) = -1;
T.rowRange(0, dim).colRange(0, dim) = U * V;
cv::Mat diag_ = cv::Mat::diag(d);
cv::Mat twp = diag_*V; //np.dot(np.diag(d), V.T)
cv::Mat B = cv::Mat::zeros(3, 3, CV_8UC1);
cv::Mat C = B.diag(0);
T.rowRange(0, dim).colRange(0, dim) = U* twp;
d.at<float>(dim - 1, 0) = s;
cv::Mat diag_ = cv::Mat::diag(d);
cv::Mat twp = diag_*V.t(); //np.dot(np.diag(d), V.T)
cv::Mat res = U* twp; // U
T.rowRange(0, dim).colRange(0, dim) = -U.t()* twp;
cv::Mat var_ = VarAxis0(src_demean);
float val = cv::sum(var_).val[0];
cv::Mat res;
float scale = 1.0/val*cv::sum(res).val[0];
T.rowRange(0, dim).colRange(0, dim) = - T.rowRange(0, dim).colRange(0, dim).t();
cv::Mat temp1 = T.rowRange(0, dim).colRange(0, dim); // T[:dim, :dim]
cv::Mat temp2 = src_mean.t(); //src_mean.T
cv::Mat temp3 = temp1*temp2; // np.dot(T[:dim, :dim], src_mean.T)
cv::Mat temp4 = scale*temp3;
T.rowRange(0, dim).colRange(dim, dim+1)= -(temp4 - dst_mean.t()) ;
T.rowRange(0, dim).colRange(0, dim) *= scale;
return T;
cv::Mat FaceWarp::ProcessFace(cv::Mat& SmallFrame, Object& Obj)
float v1[5][2] = {
{30.2946f, 51.6963f},
{65.5318f, 51.5014f},
{48.0252f, 71.7366f},
{33.5493f, 92.3655f},
{62.7299f, 92.2041f}
static cv::Mat src(5, 2, CV_32FC1, v1);
memcpy(src.data, v1, 2 * 5 * sizeof(float));
float v2[5][2] = {
{Obj.pts[0].x, Obj.pts[0].y},
{Obj.pts[1].x, Obj.pts[1].y},
{Obj.pts[2].x, Obj.pts[2].y},
{Obj.pts[3].x, Obj.pts[3].y},
{Obj.pts[4].x, Obj.pts[4].y},
cv::Mat dst(5, 2, CV_32FC1, v2);
memcpy(dst.data, v2, 2 * 5 * sizeof(float));
Angle = count_angle(v2);
cv::Mat aligned = SmallFrame.clone();
cv::Mat m = SimilarTransform(dst, src);
cv::warpPerspective(SmallFrame, aligned, m, cv::Size(96, 112), cv::INTER_LINEAR);
resize(aligned, aligned, cv::Size(112, 112), 0, 0, cv::INTER_LINEAR);
return aligned;
1.人脸对齐之后就可以比对两张脸的相似度了,使用的训练的算法是:https://github.com/deepinsight/insightface ,https://github.com/ronghuaiyang/arcface-pytorch 。
#include "Arcface.h"
int ArcFace::loadModel(std::string model, bool use_gpu)
bool has_gpu = false;
net.opt = ncnn::Option();
has_gpu = ncnn::get_gpu_count() > 0;
bool to_use_gpu = has_gpu && use_gpu;
net.opt.use_vulkan_compute = to_use_gpu;
net.load_param((model + ".param").c_str());
net.load_model((model + ".bin").c_str());
return 0;
cv::Mat ArcFace::zscore(const cv::Mat &fc)
cv::Mat mean, std;
meanStdDev(fc, mean, std);
return((fc - mean) / std);
cv::Mat ArcFace::getFeature(cv::Mat img)
vector<float> feature;
//cv to NCNN
ncnn::Mat in = ncnn::Mat::from_pixels(img.data, ncnn::Mat::PIXEL_BGR, img.cols, img.rows);
ncnn::Extractor ex = net.create_extractor();
ex.input("data", in);
ncnn::Mat out;
ex.extract("fc1", out);
for (int i = 0; i < this->feature_dim; i++) feature[i] = out[i];
cv::Mat feature__=cv::Mat(feature,true);
return zscore(feature__);
void detectImage(cv::Mat& cv_src_1, cv::Mat& cv_src_2,YoloFace &yolo_face, FaceWarp& face_warp, ArcFace& arc_face)
std::vector<Object> objects_1,objects_2;
yolo_face.detection(cv_src_2, objects_2);
yolo_face.detection(cv_src_1, objects_1);
if (objects_1.size() == 1 && objects_2.size() == 1)
cv::Mat cv_face_1 = cv_src_1(objects_1[0].rect);
cv::Mat cv_face_2 = cv_src_2(objects_2[0].rect);
cv::Mat cv_aligned_1 = face_warp.ProcessFace(cv_src_1, objects_1[0]);
cv::Mat cv_aligned_2 = face_warp.ProcessFace(cv_src_2, objects_2[0]);
cv::Mat fc1 = arc_face.getFeature(cv_aligned_1);
cv::Mat fc2 = arc_face.getFeature(cv_aligned_2);
double score = CosineDistance(fc1, fc2);
cv::rectangle(cv_src_1, objects_1[0].rect, cv::Scalar(180, 180, 0), 2);
cv::rectangle(cv_src_2, objects_2[0].rect, cv::Scalar(255, 0, 255), 2);
cv::putText(cv_src_1, cv::format("score : %0.1f", score), cv::Point(10, 40), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 0, 255));
cv::putText(cv_src_2, cv::format("score: %0.1f", score), cv::Point(10, 40), cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(0, 0, 255));
for (auto o : objects_2[0].pts)
cv::circle(cv_src_2, o, 2, cv::Scalar(0, 255, 0), -1);
for (auto o : objects_1[0].pts)
cv::circle(cv_src_1, o, 2, cv::Scalar(0, 255, 0), -1);
cv::imshow("1", cv_src_1);
cv::imshow("2", cv_src_2);
int main(void)
YoloFace yolo_face;
ArcFace face_arc;
FaceWarp face_warp;
cv::Mat cv_src_1 = cv::imread("images/61.jpg");
cv::Mat cv_src_2 = cv::imread("images/31.jpg");
detectImage(cv_src_1, cv_src_2,yolo_face, face_warp, face_arc);
3.1 两张不同角度的脸。
3.2 正视。
3.3 部分不完整的脸。
3.4 十级美颜后的人脸对比,还是很容易就能分出差异。
1.源码地址: https://download.csdn.net/download/matt45m/86823231
