人脸关键点检测和头部姿态估计数据集生成_基于人脸关键点检测的头部姿态识别

作者：繁依Fanyi0 | 2024-02-16 23:36:25

踩

基于人脸关键点检测的头部姿态识别

接上篇人脸关键点检测和头部姿态估计数据集整理

1. 借助于OpenPose生成人脸关键点和头部姿态，使用链接：https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments

build/bin/FaceLandmarkImg.exe -fdir "images path"

具体使用可参考博客，每张人脸可得到一个csv文件，里面记录了人脸的很多信息，我们只需要68个关键点和头部姿态

2. 借助于人脸检测网络生成人脸检测结果，每张人脸可得到一个csv文件，里面记录了人脸的左上角和右下角坐标，具体使用可参考博客

3.结合OpenPose和人脸检测结果生成数据集label标注文件，格式为：

face_path x1 y1 x2 y2 x3 y3 ...... x68 y68 pitch yaw roll

执行代码：Generate_labels.py


import sys, os
import cv2
import numpy as np
import csv
import argparse
import math
from shutil import copyfile
 
#Determine whether it is a rotation matrix
def isRotationMatrix(R):
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype=R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6
 
#rotationvector to rorationmatrix
def vec2matrix(rvec):
    theta = np.linalg.norm(rvec)
    r = rvec / theta
    R_ = np.array([[0, -r[2][0], r[1][0]],
               [r[2][0], 0, -r[0][0]],
               [-r[1][0], r[0][0], 0]])
    R = np.cos(theta) * np.eye(3) + (1 - np.cos(theta)) * r * r.T + np.sin(theta) * R_
    return R
 
#rotationmatrix to EulerAngles
def rotationMatrixToAngles(R):
    assert (isRotationMatrix(R))
    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
    singular = sy < 1e-6
    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
    else:
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0
 
    x = x*180.0/3.141592653589793
    y = y*180.0/3.141592653589793
    z = z*180.0/3.141592653589793
    return np.array([-1.0*x+10, y, z])
 
def compute_iou(rec1, rec2):
    """
    computing IoU
    :param rec1: (y0, x0, y1, x1), which reflects
            (top, left, bottom, right)
    :param rec2: (y0, x0, y1, x1)
    :return: scala value of IoU
    """
    # computing area of each rectangles
    S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
    S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
 
    # computing the sum_area
    sum_area = S_rec1 + S_rec2
 
    # find the each edge of intersect rectangle
    left_line = max(rec1[1], rec2[1])
    right_line = min(rec1[3], rec2[3])
    top_line = max(rec1[0], rec2[0])
    bottom_line = min(rec1[2], rec2[2])
 
    # judge if there is an intersect
    if left_line >= right_line or top_line >= bottom_line:
        return 0
    else:
        intersect = (right_line - left_line) * (bottom_line - top_line)
        return (intersect / (sum_area - intersect)) * 1.0
 
 
def Generate_labels(img_path, csv_path, txt_path, face_path, label_path):
    """
    :param img_path:
    :param csv_path:
    :param txt_path:
    :param face_path:
    :param label_path:
    :return:
    """
    img_list = os.listdir(img_path)
    fp_label = open(os.path.join(label_path, '300w_label.txt'), 'w')
    for img_file in img_list:
        print('now is dealing with '+str(img_file))
        # Iterative execution of each image
        filename, extension = os.path.splitext(img_file)
        extension = extension[1:]
        if extension == 'jpg' or extension == 'png':
            img = cv2.imread(os.path.join(img_path, img_file))
            # read and organize the result of OpenFace, result is saved to 'csv_result' for format as:
            # [
            # [point0_x,point0_y],
            # [point1_x,point1_y],
            # [point2_x,point2_y],
            # ......
            # [pose_x,pose_y,pose_z],
            # [xmin_csv,ymin_csv,xmax_csv,ymax_csv]
            # ]
            if not os.path.exists(os.path.join(csv_path, filename + '.csv')):
                continue
            csv_file = open(os.path.join(csv_path, filename + '.csv'), 'r')
            reader = csv.reader(csv_file)
            csv_result = []
            for item in reader:
                csv_result_temp = []
                xmin_csv = 50000
                ymin_csv = 50000
                xmax_csv = 0
                ymax_csv = 0
                if reader.line_num == 1:
                    continue
                for i in range(68):
                    csv_result_temp.append([float(item[296 + i]), float(item[296 + i + 68])])
                    xmin_csv = min(xmin_csv, float(item[296 + i]))
                    xmax_csv = max(xmax_csv, float(item[296 + i]))
                    ymin_csv = min(ymin_csv, float(item[296 + i + 68]))
                    ymax_csv = max(ymax_csv, float(item[296 + i + 68]))
                csv_result_temp.append([item[293], item[294], item[295]])
                csv_result_temp.append([xmin_csv, ymin_csv, xmax_csv, ymax_csv])
                csv_result.append(csv_result_temp)
            csv_file.close()
            # read and organize the label of 300w dataset, result is saved to 'pts_result' for format as:
            # [
            # [point0_x,point0_y],
            # [point1_x,point1_y],
            # [point2_x,point2_y],
            # ......
            # [xmin_pts,ymin_pts,xmax_pts,ymax_pts]
            # ]
            fp_pts = open(os.path.join(img_path, filename + '.pts'), 'r')
            lines = fp_pts.readlines()
            lines = lines[3:71]
            xmin_pts = 50000
            ymin_pts = 50000
            xmax_pts = 0
            ymax_pts = 0
            pts_result = []
            for line in lines:
                S = line.split(' ')
                point_x = float(S[0])
                point_y = float(S[1])
                xmin_pts = min(xmin_pts, point_x)
                ymin_pts = min(ymin_pts, point_y)
                xmax_pts = max(xmax_pts, point_x)
                ymax_pts = max(ymax_pts, point_y)
                pts_result.append([point_x, point_y])
            pts_result.append([xmin_pts, ymin_pts, xmax_pts, ymax_pts])
            fp_pts.close()
            face_pose = []
            face_bbox = []
            iou_max = 0
            for i in range(len(csv_result)):
                if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
                csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) > iou_max\
                        and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
                csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) >= 0.5:
                    face_pose = csv_result[i][68][0:]
            if not face_pose:
                continue
 
            # read the face detected result with txt format and get the face bbox, which is in Square form
            fp_txt = open(os.path.join(txt_path, filename + '.txt'), 'r')
            lines = fp_txt.readlines()
            iou_max = 0
            for line in lines:
                line = line[:-1]
                S = line.split(' ')
                xmin_txt = float(S[0])
                ymin_txt = float(S[1])
                xmax_txt = float(S[2])
                ymax_txt = float(S[3])
                if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
                               (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) > iou_max\
                        and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
                               (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) >= 0.7:
                    face_bbox.append([xmin_txt, ymin_txt, xmax_txt, ymax_txt])
            fp_txt.close()
 
            # determine if the face is detected
            if face_bbox:
                width = xmax_txt - xmin_txt
                height = ymax_txt - ymin_txt
                adjust_value = 1.0 * (height - width) / 2.0
                xmin = int(round(max(0, xmin_txt - adjust_value)))
                ymin = int(round(ymin_txt))
                xmax = int(round(min(img.shape[1], xmin+height)))
                ymax = int(round(ymax_txt))
		print(xmax-xmin, ymax-ymin)
            else:
                width = xmax_pts - xmin_pts
                height = ymax_pts - ymin_pts
                long_size = max(width, height)
                adjust_value_x = 1.0 * (long_size - width) / 2.0
                adjust_value_y = 1.0 * (long_size - height) / 2.0
                xmin = int(round(max(0, xmin_pts - adjust_value_x)))
                xmax = int(round(min(img.shape[1], xmin+long_size)))
                ymin = int(round(max(0, ymin_pts - adjust_value_y)))
                ymax = int(round(min(img.shape[0], ymin+long_size)))
		print(xmax-xmin,ymax-ymin)
            # crop face from original img
            img_face = img[ymin:ymax, xmin:xmax, :]
            cv2.imwrite(os.path.join(face_path, filename + '.jpg'), img_face)
            # write the path of cropped face to txt
            fp_label.write(os.path.join(face_path, filename + '.jpg') + ' ')
            #fp_label.write('/home/OpenFace/300w_face/'+filename + '.jpg'+' ')
            # write the point coordinates to txt
            for i in range(68):
                fp_label.write(str(pts_result[i][0] - xmin) + ' ' + str(pts_result[i][1] - ymin) + ' ')
            # write the face pose to txt
	    '''
            rotation_vector = []
            rotation_vector.append([float(face_pose[0])])
            rotation_vector.append([float(face_pose[1])])
            rotation_vector.append([float(face_pose[2])])
            R = vec2matrix(rotation_vector)
            headpose = rotationMatrixToAngles(R)
	    '''
	    headpose = []
	    headpose.append(float(face_pose[0])/math.pi*180)
	    headpose.append(float(face_pose[1])/math.pi*180)
	    headpose.append(float(face_pose[2])/math.pi*180)
            fp_label.write(str(headpose[0]) + ' ' + str(headpose[1]) +' ' + str(headpose[2]) + '\n')
    fp_label.close()
 
 
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--img_path", type=str, default='img_path',
                        help="the path to the 300w dataset")
    parser.add_argument("--csv_path", type=str, default='csv_path',
                        help="the path to the result of OpenFace")
    parser.add_argument("--txt_path", type=str, default='txt_path',
                        help="the path to the face detect")
    parser.add_argument("--face_path", type=str, default='face_path_test',
                        help="the path to the crop face saved")
    parser.add_argument("--label_path", type=str, default='label_path_test',
                        help="the path to the label of 300w dataset")
    opt = parser.parse_args()
    Generate_labels(opt.img_path, opt.csv_path, opt.txt_path, opt.face_path, opt.label_path)

4.参考github：https://github.com/lsy17096535/face-landmark/blob/master/train/mainloop.py

生成hdf5时对数据集做的预处理操作如下所示：


image   人脸
 
1. resize(img,(60,60)).astype('f4')
2. cvtcolor(BGR2GRAY).reshape(1,60,60)
3. m,s = cv2.meanstdDev(image)
4. image = (image - m) / (1.e-6 + s)


landmarks  关键点
1. 乘以scale系数  rx=60/face img width  ry=60/face img height
2. 归一化  /60


poses  头部姿态
1. 归一化  /50

执行代码：Generate_hdf5.py


import sys,os
import cv2
import numpy as np
from shutil import copyfile
caffe_root = 'caffe/python/'
sys.path.insert(0, caffe_root + 'python')
import caffe
import h5py
import argparse
 
IMAGE_SIZE = 60  #fixed size to all images
 
def Generate_hdf5(train_txt_file, train_h5_file, train_h5_list_file):
    """
	   Generate hdf5 format based on the txt label
	   :param train_txt_file: label in txt format (imgpath x1 y1 x2 y2 ...... x68 y68 pitch yaw roll)
	   :param train_h5_file: h5 file to be saved
	   :param train_h5_list_file: list file in txt format to be saved
	   :return:
	"""
    with open(train_txt_file,'r') as T:
        lines = T.readlines()
        HD5Images = np.zeros([len(lines), 1, IMAGE_SIZE, IMAGE_SIZE], dtype='float32')
        HD5Landmarks = np.zeros([len(lines), 136], dtype='float32')
        HD5Poses = np.zeros([len(lines), 3], dtype='float32')
 
        for i,l in enumerate(lines):
            sp = l.split(' ')
            print(sp[0])
            img = cv2.imread(sp[0])
            height,width = img.shape[0], img.shape[1]
            rx,ry = 1.0*IMAGE_SIZE/width, 1.0*IMAGE_SIZE/height
            res = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), 0.0, 0.0, interpolation=cv2.INTER_CUBIC)
            image = res.astype('f4')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).reshape(1, IMAGE_SIZE, IMAGE_SIZE)
            m, s = cv2.meanStdDev(image)
            image = (image - m) / (1.e-6 + s)
            HD5Images[i, :] = image
            label_ = np.zeros([136], dtype='float32')
            pose_ = np.zeros([3], dtype='float32')
 
            for j in range(136):
                if (j + 1) % 2:
                    scale_factor = rx
                else:
                    scale_factor = ry
                label_[j] = float(sp[j + 1]) * float(scale_factor)
                label_[j] = label_[j]/(1.0*IMAGE_SIZE)
                label_[j] = label_[j].astype('f4')
            HD5Landmarks[i, :] = label_[:]
            # print(HD5Landmarks[i, :])
 
            for j in range(3):
                normalize_factor = 50
                pose_[j] = float(sp[j + 1 + 136]) / float(normalize_factor)
                pose_[j] = pose_[j].astype('f4')
       		HD5Poses[i, :] = pose_[:]
 
		with h5py.File(train_h5_file, 'w') as H:
			H.create_dataset('data', data=HD5Images)
			H.create_dataset('label', data=HD5Landmarks)
			H.create_dataset('pose', data=HD5Poses)
		with open(train_h5_list_file, 'w') as L:
			L.write(train_h5_file)
 
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_txt_file",type=str,default="label_path/300w_label.txt", help="path to label in txt format")
    parser.add_argument("--train_h5_file",type=str,default="test.h5",help="path to generated h5 file")
    parser.add_argument("--train_h5_list_file",type=str,default="test_h5_list.txt",help="path to generated he list file")
    opt=parser.parse_args()
    Generate_hdf5(opt.train_txt_file, opt.train_h5_file, opt.train_h5_list_file)

5. 可视化hdf5文件是否正确

执行代码：visual_h5.py


import sys,os
import cv2
import h5py
import numpy as np
 
f = h5py.File('../data/300w_ori/train.h5','r')
data = f['data']
label = f['label']
pose = f['pose']
num = data.shape[0]
for i in range(1,100):
    # print(data.shape)
    img = np.array(data[i,0,:,:]*255)
    image = np.zeros([img.shape[0],img.shape[1],3],dtype='float32')
    image[:,:,0] = img[:]
    image[:,:,1] = img[:]
    image[:,:,2] = img[:]
    print(image.shape)
    landmark = np.array(label[i,:])
    print(landmark)
    # print(landmark.shape)
    cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'),image)
    img = cv2.imread(os.path.join('../data/300w_ori/result',str(i)+'.jpg'))
    for j in range(68):
        x = int(round(landmark[2*j]*60))
        y = int(round(landmark[2*j+1]*60))
        # print(x,y)
        cv2.circle(img,(x,y),1,(0,0,255),1)
    Euler = np.array(pose[i,:])
    # print(Euler.shape)
    cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'), img)

本文内容由网友自发贡献，转载请注明出处：https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/97807