1. 借助于OpenPose生成人脸关键点和头部姿态,使用链接:https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
build/bin/FaceLandmarkImg.exe -fdir "images path"
2. 借助于人脸检测网络生成人脸检测结果,每张人脸可得到一个csv文件,里面记录了人脸的左上角和右下角坐标,具体使用可参考博客
face_path x1 y1 x2 y2 x3 y3 ...... x68 y68 pitch yaw roll
- import sys, os
- import cv2
- import numpy as np
- import csv
- import argparse
- import math
- from shutil import copyfile
- #Determine whether it is a rotation matrix
- def isRotationMatrix(R):
- Rt = np.transpose(R)
- shouldBeIdentity = np.dot(Rt, R)
- I = np.identity(3, dtype=R.dtype)
- n = np.linalg.norm(I - shouldBeIdentity)
- return n < 1e-6
- #rotationvector to rorationmatrix
- def vec2matrix(rvec):
- theta = np.linalg.norm(rvec)
- r = rvec / theta
- R_ = np.array([[0, -r[2][0], r[1][0]],
- [r[2][0], 0, -r[0][0]],
- [-r[1][0], r[0][0], 0]])
- R = np.cos(theta) * np.eye(3) + (1 - np.cos(theta)) * r * r.T + np.sin(theta) * R_
- return R
- #rotationmatrix to EulerAngles
- def rotationMatrixToAngles(R):
- assert (isRotationMatrix(R))
- sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
- singular = sy < 1e-6
- if not singular:
- x = math.atan2(R[2, 1], R[2, 2])
- y = math.atan2(-R[2, 0], sy)
- z = math.atan2(R[1, 0], R[0, 0])
- else:
- x = math.atan2(-R[1, 2], R[1, 1])
- y = math.atan2(-R[2, 0], sy)
- z = 0
- x = x*180.0/3.141592653589793
- y = y*180.0/3.141592653589793
- z = z*180.0/3.141592653589793
- return np.array([-1.0*x+10, y, z])
- def compute_iou(rec1, rec2):
- """
- computing IoU
- :param rec1: (y0, x0, y1, x1), which reflects
- (top, left, bottom, right)
- :param rec2: (y0, x0, y1, x1)
- :return: scala value of IoU
- """
- # computing area of each rectangles
- S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
- S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
- # computing the sum_area
- sum_area = S_rec1 + S_rec2
- # find the each edge of intersect rectangle
- left_line = max(rec1[1], rec2[1])
- right_line = min(rec1[3], rec2[3])
- top_line = max(rec1[0], rec2[0])
- bottom_line = min(rec1[2], rec2[2])
- # judge if there is an intersect
- if left_line >= right_line or top_line >= bottom_line:
- return 0
- else:
- intersect = (right_line - left_line) * (bottom_line - top_line)
- return (intersect / (sum_area - intersect)) * 1.0
- def Generate_labels(img_path, csv_path, txt_path, face_path, label_path):
- """
- :param img_path:
- :param csv_path:
- :param txt_path:
- :param face_path:
- :param label_path:
- :return:
- """
- img_list = os.listdir(img_path)
- fp_label = open(os.path.join(label_path, '300w_label.txt'), 'w')
- for img_file in img_list:
- print('now is dealing with '+str(img_file))
- # Iterative execution of each image
- filename, extension = os.path.splitext(img_file)
- extension = extension[1:]
- if extension == 'jpg' or extension == 'png':
- img = cv2.imread(os.path.join(img_path, img_file))
- # read and organize the result of OpenFace, result is saved to 'csv_result' for format as:
- # [
- # [point0_x,point0_y],
- # [point1_x,point1_y],
- # [point2_x,point2_y],
- # ......
- # [pose_x,pose_y,pose_z],
- # [xmin_csv,ymin_csv,xmax_csv,ymax_csv]
- # ]
- if not os.path.exists(os.path.join(csv_path, filename + '.csv')):
- continue
- csv_file = open(os.path.join(csv_path, filename + '.csv'), 'r')
- reader = csv.reader(csv_file)
- csv_result = []
- for item in reader:
- csv_result_temp = []
- xmin_csv = 50000
- ymin_csv = 50000
- xmax_csv = 0
- ymax_csv = 0
- if reader.line_num == 1:
- continue
- for i in range(68):
- csv_result_temp.append([float(item[296 + i]), float(item[296 + i + 68])])
- xmin_csv = min(xmin_csv, float(item[296 + i]))
- xmax_csv = max(xmax_csv, float(item[296 + i]))
- ymin_csv = min(ymin_csv, float(item[296 + i + 68]))
- ymax_csv = max(ymax_csv, float(item[296 + i + 68]))
- csv_result_temp.append([item[293], item[294], item[295]])
- csv_result_temp.append([xmin_csv, ymin_csv, xmax_csv, ymax_csv])
- csv_result.append(csv_result_temp)
- csv_file.close()
- # read and organize the label of 300w dataset, result is saved to 'pts_result' for format as:
- # [
- # [point0_x,point0_y],
- # [point1_x,point1_y],
- # [point2_x,point2_y],
- # ......
- # [xmin_pts,ymin_pts,xmax_pts,ymax_pts]
- # ]
- fp_pts = open(os.path.join(img_path, filename + '.pts'), 'r')
- lines = fp_pts.readlines()
- lines = lines[3:71]
- xmin_pts = 50000
- ymin_pts = 50000
- xmax_pts = 0
- ymax_pts = 0
- pts_result = []
- for line in lines:
- S = line.split(' ')
- point_x = float(S[0])
- point_y = float(S[1])
- xmin_pts = min(xmin_pts, point_x)
- ymin_pts = min(ymin_pts, point_y)
- xmax_pts = max(xmax_pts, point_x)
- ymax_pts = max(ymax_pts, point_y)
- pts_result.append([point_x, point_y])
- pts_result.append([xmin_pts, ymin_pts, xmax_pts, ymax_pts])
- fp_pts.close()
- face_pose = []
- face_bbox = []
- iou_max = 0
- for i in range(len(csv_result)):
- if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
- csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) > iou_max\
- and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
- csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) >= 0.5:
- face_pose = csv_result[i][68][0:]
- if not face_pose:
- continue
- # read the face detected result with txt format and get the face bbox, which is in Square form
- fp_txt = open(os.path.join(txt_path, filename + '.txt'), 'r')
- lines = fp_txt.readlines()
- iou_max = 0
- for line in lines:
- line = line[:-1]
- S = line.split(' ')
- xmin_txt = float(S[0])
- ymin_txt = float(S[1])
- xmax_txt = float(S[2])
- ymax_txt = float(S[3])
- if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
- (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) > iou_max\
- and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
- (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) >= 0.7:
- face_bbox.append([xmin_txt, ymin_txt, xmax_txt, ymax_txt])
- fp_txt.close()
- # determine if the face is detected
- if face_bbox:
- width = xmax_txt - xmin_txt
- height = ymax_txt - ymin_txt
- adjust_value = 1.0 * (height - width) / 2.0
- xmin = int(round(max(0, xmin_txt - adjust_value)))
- ymin = int(round(ymin_txt))
- xmax = int(round(min(img.shape[1], xmin+height)))
- ymax = int(round(ymax_txt))
- print(xmax-xmin, ymax-ymin)
- else:
- width = xmax_pts - xmin_pts
- height = ymax_pts - ymin_pts
- long_size = max(width, height)
- adjust_value_x = 1.0 * (long_size - width) / 2.0
- adjust_value_y = 1.0 * (long_size - height) / 2.0
- xmin = int(round(max(0, xmin_pts - adjust_value_x)))
- xmax = int(round(min(img.shape[1], xmin+long_size)))
- ymin = int(round(max(0, ymin_pts - adjust_value_y)))
- ymax = int(round(min(img.shape[0], ymin+long_size)))
- print(xmax-xmin,ymax-ymin)
- # crop face from original img
- img_face = img[ymin:ymax, xmin:xmax, :]
- cv2.imwrite(os.path.join(face_path, filename + '.jpg'), img_face)
- # write the path of cropped face to txt
- fp_label.write(os.path.join(face_path, filename + '.jpg') + ' ')
- #fp_label.write('/home/OpenFace/300w_face/'+filename + '.jpg'+' ')
- # write the point coordinates to txt
- for i in range(68):
- fp_label.write(str(pts_result[i][0] - xmin) + ' ' + str(pts_result[i][1] - ymin) + ' ')
- # write the face pose to txt
- '''
- rotation_vector = []
- rotation_vector.append([float(face_pose[0])])
- rotation_vector.append([float(face_pose[1])])
- rotation_vector.append([float(face_pose[2])])
- R = vec2matrix(rotation_vector)
- headpose = rotationMatrixToAngles(R)
- '''
- headpose = []
- headpose.append(float(face_pose[0])/math.pi*180)
- headpose.append(float(face_pose[1])/math.pi*180)
- headpose.append(float(face_pose[2])/math.pi*180)
- fp_label.write(str(headpose[0]) + ' ' + str(headpose[1]) +' ' + str(headpose[2]) + '\n')
- fp_label.close()
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("--img_path", type=str, default='img_path',
- help="the path to the 300w dataset")
- parser.add_argument("--csv_path", type=str, default='csv_path',
- help="the path to the result of OpenFace")
- parser.add_argument("--txt_path", type=str, default='txt_path',
- help="the path to the face detect")
- parser.add_argument("--face_path", type=str, default='face_path_test',
- help="the path to the crop face saved")
- parser.add_argument("--label_path", type=str, default='label_path_test',
- help="the path to the label of 300w dataset")
- opt = parser.parse_args()
- Generate_labels(opt.img_path, opt.csv_path, opt.txt_path, opt.face_path, opt.label_path)

- image 人脸
- 1. resize(img,(60,60)).astype('f4')
- 2. cvtcolor(BGR2GRAY).reshape(1,60,60)
- 3. m,s = cv2.meanstdDev(image)
- 4. image = (image - m) / (1.e-6 + s)
- landmarks 关键点
- 1. 乘以scale系数 rx=60/face img width ry=60/face img height
- 2. 归一化 /60
- poses 头部姿态
- 1. 归一化 /50
- import sys,os
- import cv2
- import numpy as np
- from shutil import copyfile
- caffe_root = 'caffe/python/'
- sys.path.insert(0, caffe_root + 'python')
- import caffe
- import h5py
- import argparse
- IMAGE_SIZE = 60 #fixed size to all images
- def Generate_hdf5(train_txt_file, train_h5_file, train_h5_list_file):
- """
- Generate hdf5 format based on the txt label
- :param train_txt_file: label in txt format (imgpath x1 y1 x2 y2 ...... x68 y68 pitch yaw roll)
- :param train_h5_file: h5 file to be saved
- :param train_h5_list_file: list file in txt format to be saved
- :return:
- """
- with open(train_txt_file,'r') as T:
- lines = T.readlines()
- HD5Images = np.zeros([len(lines), 1, IMAGE_SIZE, IMAGE_SIZE], dtype='float32')
- HD5Landmarks = np.zeros([len(lines), 136], dtype='float32')
- HD5Poses = np.zeros([len(lines), 3], dtype='float32')
- for i,l in enumerate(lines):
- sp = l.split(' ')
- print(sp[0])
- img = cv2.imread(sp[0])
- height,width = img.shape[0], img.shape[1]
- rx,ry = 1.0*IMAGE_SIZE/width, 1.0*IMAGE_SIZE/height
- res = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), 0.0, 0.0, interpolation=cv2.INTER_CUBIC)
- image = res.astype('f4')
- image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).reshape(1, IMAGE_SIZE, IMAGE_SIZE)
- m, s = cv2.meanStdDev(image)
- image = (image - m) / (1.e-6 + s)
- HD5Images[i, :] = image
- label_ = np.zeros([136], dtype='float32')
- pose_ = np.zeros([3], dtype='float32')
- for j in range(136):
- if (j + 1) % 2:
- scale_factor = rx
- else:
- scale_factor = ry
- label_[j] = float(sp[j + 1]) * float(scale_factor)
- label_[j] = label_[j]/(1.0*IMAGE_SIZE)
- label_[j] = label_[j].astype('f4')
- HD5Landmarks[i, :] = label_[:]
- # print(HD5Landmarks[i, :])
- for j in range(3):
- normalize_factor = 50
- pose_[j] = float(sp[j + 1 + 136]) / float(normalize_factor)
- pose_[j] = pose_[j].astype('f4')
- HD5Poses[i, :] = pose_[:]
- with h5py.File(train_h5_file, 'w') as H:
- H.create_dataset('data', data=HD5Images)
- H.create_dataset('label', data=HD5Landmarks)
- H.create_dataset('pose', data=HD5Poses)
- with open(train_h5_list_file, 'w') as L:
- L.write(train_h5_file)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument("--train_txt_file",type=str,default="label_path/300w_label.txt", help="path to label in txt format")
- parser.add_argument("--train_h5_file",type=str,default="test.h5",help="path to generated h5 file")
- parser.add_argument("--train_h5_list_file",type=str,default="test_h5_list.txt",help="path to generated he list file")
- opt=parser.parse_args()
- Generate_hdf5(opt.train_txt_file, opt.train_h5_file, opt.train_h5_list_file)

5. 可视化hdf5文件是否正确
- import sys,os
- import cv2
- import h5py
- import numpy as np
- f = h5py.File('../data/300w_ori/train.h5','r')
- data = f['data']
- label = f['label']
- pose = f['pose']
- num = data.shape[0]
- for i in range(1,100):
- # print(data.shape)
- img = np.array(data[i,0,:,:]*255)
- image = np.zeros([img.shape[0],img.shape[1],3],dtype='float32')
- image[:,:,0] = img[:]
- image[:,:,1] = img[:]
- image[:,:,2] = img[:]
- print(image.shape)
- landmark = np.array(label[i,:])
- print(landmark)
- # print(landmark.shape)
- cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'),image)
- img = cv2.imread(os.path.join('../data/300w_ori/result',str(i)+'.jpg'))
- for j in range(68):
- x = int(round(landmark[2*j]*60))
- y = int(round(landmark[2*j+1]*60))
- # print(x,y)
- cv2.circle(img,(x,y),1,(0,0,255),1)
- Euler = np.array(pose[i,:])
- # print(Euler.shape)
- cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'), img)

