赞
踩
最近冒出做人脸识别的想法,在github上正巧看到这个项目,在了解了大概思路之后打算自己独立复刻出这个人脸识别项目。由于笔者自身代码水平并不高,若有地方错误或者不合适的,还希望大神能够指出,感谢交流!写完这篇文章后感觉又收获不少东西。
Dlib 是一个现代C++工具包,包含机器学习算法和工具,用于在C++中创建复杂的软件,以解决现实世界中的问题。按照dlib官网的叙述,其特点主要有:
笔者实验时的主要环境如下:
python = 3.6.4
dlib = 19.8.1
opencv = 3.4.1.15
tqdm = 4.62.1
conda create -n dlibTest python=3.6.4
conda activate dlibTest
pip install dlib==19.8.1
pip install opencv-python==3.4.1.15
pip install tqdm
先在代码中定义dlib用于人脸识别的检测器和特征提取器
- detector = dlib.get_frontal_face_detector() # 人脸检测器
- # detector = dlib.cnn_face_detection_model_v1(model_path)
-
- predictor = dlib.shape_predictor(shape_predictor_path) # 人脸68点提取器
- # shape_predictor_path = 'data_dlib/shape_predictor_68_face_landmarks.dat'
-
- recognition_model = dlib.face_recognition_model_v1(recognition_model_path) # 基于resnet的128维特征向量提取器
- # recognition_model_path = 'data_dlib/dlib_face_recognition_resnet_model_v1.dat'
其中,人脸检测器detector也可用cnn进行检测。
人脸数据可以用网上的照片,也可以自己通过摄像头采集。
通过摄像头获取图像进行人脸识别的代码如下,运行过程中可收集人脸数据,检测到人脸后按下“n”可新建人脸文件夹,之后再按下“s”可对人脸图像进行保存。因为一次采集的数据均放在一个文件夹下,故一次采集应只对一人进行。
- import cv2 as cv
- import time
- import os
- import config
-
- class face_detect():
- def __init__(self):
- self.start_time = 0 # 用于计算帧率
- self.fps = 0 # 帧率
-
- self.image = None
- self.face_img = None
-
- self.face_num = 0 # 这一帧的人脸个数
- self.last_face_num = 0 # 上一帧的人脸个数
-
- self.face_num_change_flag = False # 当前帧人脸数量变化的标志位,用于后续人脸识别提高帧率
- self.quit_flag = False # 退出程序标志位
- self.buildNewFolder = False # 按下"n"新建文件夹标志位
- self.save_flag = False # 按下“s”保存人脸数据标志位
- self.face_flag = False # 人脸检测标志位
-
- self.img_num = 0 # 人脸数据文件夹内的图像个数
-
- self.collect_face_data = True # 是否进行人脸数据的采集,只有为真时才会进行采集
-
- def get_fps(self):
- now = time.time()
- time_period = now - self.start_time
- self.fps = 1.0 / time_period
- self.start_time = now
- color = (0,255,0)
- if self.fps < 15:
- color = (0,0,255)
- cv.putText(self.image, str(self.fps.__round__(2)), (20, 50), cv.FONT_HERSHEY_DUPLEX, 1, color)
-
- def key_scan(self, key):
- if self.collect_face_data == True:
- if self.save_flag == True and self.buildNewFolder == True:
- if self.face_img.size > 0:
- cv.imwrite(
- config.faceData_path + 'person_{}/{}.png'.format(config.num_of_person_in_lib - 1, self.img_num),
- self.face_img)
- self.img_num += 1
-
- if key == ord('s'):
- self.save_flag = not self.save_flag
-
- if key == ord('n'):
- os.makedirs(config.faceData_path + 'person_{}'.format(config.num_of_person_in_lib))
- config.num_of_person_in_lib += 1
- print("新文件夹建立成功!!")
- self.buildNewFolder = True
- if key == ord('q'): self.quit_flag = True
-
- def face_detecting(self):
- face_location = []
- all_face_location = []
-
- faces = config.detector(self.image, 0)
- self.face_num = len(faces)
-
- if self.face_num != self.last_face_num:
- self.face_num_change_flag = True
- print("脸数改变,由{}张变为{}张".format(self.last_face_num, self.face_num))
- self.check_times = 0
- self.last_face_num = self.face_num
- else:
- self.face_num_change_flag = False
-
- if len(faces) != 0:
- self.face_flag = True
-
- for i, face in enumerate(faces):
- face_location.append(face)
- w, h = (face.right() - face.left()), (face.bottom() - face.top())
- left, right, top, bottom = face.left() - w//4, face.right() + w//4, face.top() - h//2, face.bottom() + h//4
-
- all_face_location.append([left, right, top, bottom])
-
- return face_location, all_face_location
- else:
- self.face_flag = False
-
- return None
-
- def show(self, camera):
- while camera.isOpened() and not self.quit_flag:
- val, self.image = camera.read()
- if val == False: continue
-
- key = cv.waitKey(1)
-
- res = self.face_detecting()
- if res is not None:
- _, all_face_location = res
-
- for i in range(self.face_num):
- [left, right, top, bottom] = all_face_location[i]
- self.face_img = self.image[top:bottom, left:right]
- cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
-
- if self.collect_face_data == True:
- cv.putText(self.image, "Face", (int((left + right) / 2) - 50, bottom + 20), cv.FONT_HERSHEY_COMPLEX, 1,
- (255, 255, 255))
-
- self.key_scan(key)
-
- self.get_fps()
-
- cv.namedWindow('camera', 0)
- cv.imshow('camera', self.image)
-
- camera.release()
- cv.destroyAllWindows()
-
- def main():
- try:
- cam = cv.VideoCapture(0)
- face_detect().show(cam)
- finally:
- cam.release()
- cv.destroyAllWindows()
- print("程序退出!!")
-
- if __name__ == '__main__':
- main()
具体检测效果如下图所示,马赛克为后期增加
采集的人脸数据在工程文件夹faceData下,这里三个文件夹分别存放刘德华、王冰冰、西野七濑的图片:
下一步再对记录人脸名字的txt文件进行修改,注意顺序与faceData内存放人脸图像的文件夹顺序对应。
首先是对前一步采集到的人脸图像进行人脸检测(避免有时候cv展示的图像上检测到人脸,但保存下来的图像却检测不到而报错),再对检测到人脸的图像进行68个人脸关键点的提取,提取效果如下图所示:
之后再将这68个点的值输入到resnet模型中抽象出128维的人脸特征向量,进而保存在csv文件夹中从而建立了一个人脸数据库。
获取特征向量的函数如下:
- def get_128_features(person): # person代表第几个人脸数据文件夹
- num = 0
- features = []
- imgs_folder = config.imgs_folder_path[person]
- points_faceImage_path = config.points_faceData_path + imgs_folder
-
- imgs_path = config.faceData_path + imgs_folder + '/'
- list_imgs = os.listdir(imgs_path)
- imgs_num = len(list_imgs)
-
- if os.path.exists(config.points_faceData_path + imgs_folder):
- shutil.rmtree(points_faceImage_path)
- os.makedirs(points_faceImage_path)
- print("人脸点图文件夹建立成功!!")
-
- with tqdm(total=imgs_num) as pbar:
- pbar.set_description(str(imgs_folder))
- for j in range(imgs_num):
- image = cv.imread(os.path.join(imgs_path, list_imgs[j]))
-
- faces = config.detector(image, 1) # 经查阅资料,这里的1代表采样次数
- if len(faces) != 0:
- for z, face in enumerate(faces):
- shape = config.predictor(image, face) # 获取68点的坐标
-
- w, h = (face.right() - face.left()), (face.bottom() - face.top())
- left, right, top, bottom = face.left() - w // 4, face.right() + w // 4, face.top() - h // 2, face.bottom() + h // 4
- im = image
-
- cv.rectangle(im, (left, top), (right, bottom), (0, 0, 255))
- cv.imwrite(points_faceImage_path + '/{}.png'.format(j), im)
-
- if config.get_points_faceData_flag == True:
- for p in range(0, 68):
- cv.circle(image, (shape.part(p).x, shape.part(p).y), 2, (0,0,255))
- cv.imwrite(points_faceImage_path + '/{}.png'.format(j), image)
-
- the_features = list(config.recognition_model.compute_face_descriptor(image, shape)) # 获取128维特征向量
- features.append(the_features)
- #print("第{}张图片,第{}张脸,特征向量为:{}".format(j+1, z+1, the_features))
- num += 1
- pbar.update(1)
-
- np_f = np.array(features)
- #res = np.mean(np_f, axis=0)
- res = np.median(np_f, axis=0)
-
- return res
建立好人脸数据库后就可开始进行人脸识别了,其过程也是和之前类似。先获取图像、对图像进行人脸检测、检测到人脸后进行特征抽象、将库内的特征向量逐个与当前的特征向量进行欧氏距离的计算、根据阈值判断是否属于库内人脸。
其中,n维空间计算欧氏距离的公式如下:
运用numpy库计算向量间欧式距离的代码如下:
- def calculate_EuclideanDistance(self, feature1, feature2): # 计算欧氏距离
- np_feature1 = np.array(feature1)
- np_feature2 = np.array(feature2)
-
- EuclideanDistance = np.sqrt(np.sum(np.square(np_feature1 - np_feature2)))
-
- return EuclideanDistance
人脸识别代码中进行了5次的人脸识别,之后取每个特征分量的中值得到最终预测的特征向量,尽量减少干扰。self.init_process()是进行加载库以及名字的操作:
- def recognition_from_cam(self):
- self.init_process()
- while self.camera.isOpened() and not self.quit_flag:
- val, self.image = self.camera.read()
- if val == False: continue
-
- #self.image = cv.imread('./data/test/test_bb.jpg')
-
- key = cv.waitKey(1)
-
- res = self.face_detecting() # 0.038s
-
- if res is not None:
- face, self.all_face_location = res
-
- for i in range(self.face_num):
- [left, right, top, bottom] = self.all_face_location[i]
- self.middle_point = [(left + right) /2, (top + bottom) / 2]
-
- self.face_img = self.image[top:bottom, left:right]
-
- cv.rectangle(self.image, (left, top), (right, bottom), (0, 0, 255))
-
- shape = config.predictor(self.image, face[i]) # 0.002s
-
- if self.face_num_change_flag == True or self.check_times <= 5:
- if self.face_num_change_flag == True: # 人脸数量有变化,重新进行五次检测
- self.check_times = 0
- self.last_now_middlePoint_eDistance = [99999 for _ in range(self.available_max_face_num)]
- for z in range(self.available_max_face_num): self.check_features_from_cam[z] = []
-
- if self.check_times < 5:
- the_features_from_cam = list(config.recognition_model.compute_face_descriptor(self.image, shape)) # 耗时主要在这步 0.32s
- if self.check_times == 0: # 初始帧
- self.check_features_from_cam[i].append(the_features_from_cam)
- self.last_frame_middle_point[i] = self.middle_point
- else:
- this_face_index = self.track_link() # 后续帧需要与初始帧的人脸序号对应
- self.check_features_from_cam[this_face_index].append(the_features_from_cam)
-
- elif self.check_times == 5:
- features_after_filter = self.middle_filter(self.check_features_from_cam[i])
- self.check_features_from_cam[i] = []
- for person in range(config.num_of_person_in_lib):
- e_distance = self.calculate_EuclideanDistance(self.all_features[person],
- features_after_filter) # 几乎不耗时
-
- self.all_e_distance[i].append(e_distance)
-
- if min(self.all_e_distance[i]) < config.recognition_threshold:
- self.person_name[i] = self.all_name[self.all_e_distance[i].index(min(self.all_e_distance[i]))]
- cv.putText(self.image, self.person_name[i],
- (int((left + right) / 2) - 50, bottom + 20),
- cv.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255))
- else:
- self.person_name[i] = "Unknown"
-
- print("预测结果为:{}, 与库中各人脸的欧氏距离为:{}".format(self.person_name[i], self.all_e_distance[i]))
-
- else:
- this_face_index = self.track_link()
- #print(this_face_index, self.person_name)
- cv.putText(self.image, self.person_name[this_face_index], (int((left + right) / 2) - 50, bottom + 20),
- cv.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255))
- self.check_times += 1
-
- for j in range(self.available_max_face_num):
- self.all_e_distance[j] = []
-
- self.key_scan(key)
-
- self.get_fps()
- cv.namedWindow('camera', 0)
- cv.imshow('camera', self.image)
-
- self.camera.release()
- cv.destroyAllWindows()
具体识别效果如下图所示
单张人脸
多张人脸
由于库中没有薛之谦的人脸数据,故识别出来为Unknown。
实例给的是直接读取图片,观赏效果会比较好。也可以摄像头读取图像进行识别,但若每次都进行特征向量提取,则会浪费大量时间从而导致帧率过低。原项目作者是根据前后帧的人脸数量是否发生变化来判断是否进行特征提取的,若人脸数量发生变化,则对每张人脸进行特征提取;否则就只进行人脸检测+人脸跟踪。这样就省掉了后续帧不必要的特征提取,提高了帧率。
实际效果如下图所示(13帧左右)
一般情况下的帧率(15帧左右)
我的代码放在consolas-K/dlib_faceRecognition: 使用dlib进行人脸识别 (github.com),原项目作者的代码在参考资料中。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。