  1. if __name__ == '__main__':
  2. main(parse_arguments(sys.argv[1:]))


  1. def main(args):
  2. sleep(random.random())
  3. #如果还没有输出文件夹,则创建
  4. output_dir = os.path.expanduser(args.output_dir)
  5. if not os.path.exists(output_dir):
  6. os.makedirs(output_dir)
  7. #在日志目录的文本文件中存储一些Git修订信息
  8. # Store some git revision info in a text file in the log directory
  9. src_path,_ = os.path.split(os.path.realpath(__file__))
  10. #在output_dir文件夹下创建revision_info.txt文件,里面存的是执行该命令时的参数信息
  11. #当前使用的tensorflow版本,git hash,git diff
  12. facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
  13. # 获取数据集下所有人名和其人名目录下是所有图片,
  14. # 放到ImageClass类中,再将类存到dataset列表里
  15. dataset = facenet.get_dataset(args.input_dir)
  16. print('Creating networks and loading parameters')


  1. with tf.Graph().as_default():
  2. #设置Session的GPU参数,每条线程分配多少显存
  3. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
  4. sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
  5. with sess.as_default():
  6. #获取P-Net,R-Net,O-Net网络
  7. pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)


  1. minsize = 20 # minimum size of face
  2. threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold
  3. factor = 0.709 # scale factor
  4. # Add a random key to the filename to allow alignment using multiple processes
  5. # 获取一个随机数,用于创建下面的文件名
  6. random_key = np.random.randint(0, high=99999)
  7. # 将图片和求得的相应的Bbox保存到bounding_boxes_XXXXX.txt文件里
  8. bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)



  1. with open(bounding_boxes_filename, "w") as text_file:
  2. #处理图片的总数量
  3. nrof_images_total = 0
  4. nrof_successfully_aligned = 0
  5. #是否对所有图片进行洗牌
  6. if args.random_order:
  7. random.shuffle(dataset)
  8. for cls in dataset:
  9. output_class_dir = os.path.join(output_dir, cls.name)
  10. #如果目的文件夹里还没有相应的人名的文件夹,则创建相应文件夹
  11. if not os.path.exists(output_class_dir):
  12. os.makedirs(output_class_dir)
  13. if args.random_order:
  14. random.shuffle(cls.image_paths)
  15. for image_path in cls.image_paths:
  16. nrof_images_total += 1
  17. # 对齐后的图片文件名
  18. filename = os.path.splitext(os.path.split(image_path)[1])[0]
  19. output_filename = os.path.join(output_class_dir, filename+'.png')
  20. print(image_path)
  21. if not os.path.exists(output_filename):
  22. try:
  23. #读取图片文件
  24. img = misc.imread(image_path)
  25. except (IOError, ValueError, IndexError) as e:
  26. errorMessage = '{}: {}'.format(image_path, e)
  27. print(errorMessage)
  28. else:
  29. if img.ndim<2:
  30. print('Unable to align "%s"' % image_path)
  31. text_file.write('%s\n' % (output_filename))
  32. continue
  33. if img.ndim == 2:
  34. img = facenet.to_rgb(img)
  35. img = img[:,:,0:3]


  1. #检测人脸,bounding_boxes可能包含多张人脸框数据,
  2. # 一张人脸框有5个数据,第一和第二个数据表示框左上角坐标,第三个第四个数据表示框右下角坐标,
  3. #最后一个数据应该是可信度
  4. bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
  5. #获得的人脸数量
  6. nrof_faces = bounding_boxes.shape[0]
  7. if nrof_faces>0:
  8. det = bounding_boxes[:,0:4]
  9. det_arr = []
  10. #原图片大小
  11. img_size = np.asarray(img.shape)[0:2]
  12. if nrof_faces>1:
  13. if args.detect_multiple_faces:
  14. # 如果要检测多张人脸的话
  15. for i in range(nrof_faces):
  16. det_arr.append(np.squeeze(det[i]))
  17. else:
  18. #即使有多张人脸,也只要一张人脸就够了
  19. #获取人脸框的大小
  20. bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
  21. #原图片中心坐标
  22. img_center = img_size / 2
  23. #求人脸框中心点相对于图片中心点的偏移,
  24. #(det[:,0]+det[:,2])/2和(det[:,1]+det[:,3])/2组成的坐标其实就是人脸框中心点
  25. offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
  26. #求人脸框中心到图片中心偏移的平方和
  27. #假设offsets=[[ 4.20016056 145.02849352 -134.53862838] [ -22.14250919 -26.74770141 -30.76835772]]
  28. #则offset_dist_squared=[ 507.93206189 21748.70346425 19047.33436466]
  29. offset_dist_squared = np.sum(np.power(offsets,2.0),0)
  30. # 用人脸框像素大小减去偏移平方和的两倍,得到的结果哪个大就选哪个人脸框
  31. # 其实就是综合考虑了人脸框的位置和大小,优先选择框大,又靠近图片中心的人脸框
  32. index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
  33. det_arr.append(det[index,:])
  34. else:
  35. #只有一个人脸框的话,那就没得选了
  36. det_arr.append(np.squeeze(det))


  1. for i, det in enumerate(det_arr):
  2. det = np.squeeze(det)
  3. bb = np.zeros(4, dtype=np.int32)
  4. #边界框周围的裁剪边缘,就是我们这里要裁剪的人脸框要比MTCNN获取的人脸框大一点,
  5. #至于大多少,就由margin参数决定了
  6. bb[0] = np.maximum(det[0]-args.margin/2, 0)
  7. bb[1] = np.maximum(det[1]-args.margin/2, 0)
  8. bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
  9. bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
  10. #裁剪人脸框,再缩放
  11. cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
  12. scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
  13. nrof_successfully_aligned += 1
  14. filename_base, file_extension = os.path.splitext(output_filename)
  15. if args.detect_multiple_faces:
  16. output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
  17. else:
  18. output_filename_n = "{}{}".format(filename_base, file_extension)
  19. #保存图片
  20. misc.imsave(output_filename_n, scaled)
  21. #记录信息到bounding_boxes_XXXXX.txt文件里
  22. text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))




  1. #创建MTCNN网络
  2. #关于MTCNN网络,参考博客:https://blog.csdn.net/rookie_wei/article/details/81676177
  3. def create_mtcnn(sess, model_path):
  4. if not model_path:
  5. model_path,_ = os.path.split(os.path.realpath(__file__))
  6. with tf.variable_scope('pnet'):
  7. #P-Net网络的输入,输入的宽高不限
  8. data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
  9. pnet = PNet({'data':data})
  10. pnet.load(os.path.join(model_path, 'det1.npy'), sess)
  11. with tf.variable_scope('rnet'):
  12. # R-Net网络的输入是24*24*3
  13. data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
  14. rnet = RNet({'data':data})
  15. rnet.load(os.path.join(model_path, 'det2.npy'), sess)
  16. with tf.variable_scope('onet'):
  17. # O-Net网络的输入是48*48*3
  18. data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
  19. onet = ONet({'data':data})
  20. onet.load(os.path.join(model_path, 'det3.npy'), sess)
  21. #返回两个参数,第一个参数是人脸框,第二个参数是是否人脸的概率
  22. pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
  23. # 返回两个参数,第一个参数是人脸框,第二个参数是是否人脸的概率
  24. rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
  25. # 返回三个参数,第一个参数是人脸框,第二个参数是是否人脸的概率,第三个参数是5个关键点坐标
  26. onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
  27. return pnet_fun, rnet_fun, onet_fun



  1. #P-Net网络
  2. class PNet(Network):
  3. def setup(self):
  4. (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
  5. # 第一层卷积核大小为3*3,输出通道为10
  6. .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
  7. .prelu(name='PReLU1')
  8. .max_pool(2, 2, 2, 2, name='pool1')
  9. # 第二层卷积核大小也为3*3,输出通道为16
  10. .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
  11. .prelu(name='PReLU2')
  12. # 第三层卷积核大小也为3*3,输出通道为32
  13. .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
  14. .prelu(name='PReLU3')
  15. # 这里应该就是face classification的输出
  16. .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
  17. .softmax(3,name='prob1'))
  18. #这里应该是bounding box regression的输出
  19. (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
  20. .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))


  1. class Network(object):
  2. def __init__(self, inputs, trainable=True):
  3. # The input nodes for this network
  4. self.inputs = inputs
  5. # The current list of terminal nodes
  6. self.terminals = []
  7. # Mapping from layer names to layers
  8. self.layers = dict(inputs)
  9. # If true, the resulting variables are set as trainable
  10. self.trainable = trainable
  11. #设置神经网络,子类实现
  12. self.setup()
  13. #设置神经网络,由子类实现
  14. def setup(self):
  15. """Construct the network. """
  16. raise NotImplementedError('Must be implemented by the subclass.')



  1. #加载已经训练好的网络的weights数据
  2. def load(self, data_path, session, ignore_missing=False):
  3. """Load network weights.
  4. data_path: The path to the numpy-serialized network weights
  5. session: The current TensorFlow session
  6. ignore_missing: If true, serialized weights for missing layers are ignored.
  7. """
  8. data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
  9. for op_name in data_dict:
  10. with tf.variable_scope(op_name, reuse=True):
  11. for param_name, data in iteritems(data_dict[op_name]):
  12. try:
  13. var = tf.get_variable(param_name)
  14. session.run(var.assign(data))
  15. except ValueError:
  16. if not ignore_missing:
  17. raise


  1. pnet.load(os.path.join(model_path, 'det1.npy'), sess)
  2. rnet.load(os.path.join(model_path, 'det2.npy'), sess)
  3. onet.load(os.path.join(model_path, 'det3.npy'), sess)



  1. #通过替换终端节点为下一个操作设置输入。参数可以是层名称,也可以是实际层。
  2. def feed(self, *args):
  3. """Set the input(s) for the next operation by replacing the terminal nodes.
  4. The arguments can be either layer names or the actual layers.
  5. """
  6. assert len(args) != 0
  7. self.terminals = []
  8. for fed_layer in args:
  9. if isinstance(fed_layer, string_types):
  10. try:
  11. fed_layer = self.layers[fed_layer]
  12. except KeyError:
  13. raise KeyError('Unknown layer name fed: %s' % fed_layer)
  14. self.terminals.append(fed_layer)
  15. return self


第三层网络输出后,再经过一个1*1*4的卷积层,得到bounding box regression。所以这feed函数其实就是获取网络节点,想获取哪个网络节点就传入那个网络节点的名字即可,而self.feed('data')的data就是create_mtcnn函数中传入的占位符,也就是输入图片的数据。


  1. #卷积层
  2. @layer
  3. def conv(self,
  4. inp,
  5. k_h,
  6. k_w,
  7. c_o,
  8. s_h,
  9. s_w,
  10. name,
  11. relu=True,
  12. padding='SAME',
  13. group=1,
  14. biased=True):
  15. # Verify that the padding is acceptable
  16. self.validate_padding(padding)
  17. # Get the number of channels in the input
  18. c_i = int(inp.get_shape()[-1])
  19. # Verify that the grouping parameter is valid
  20. assert c_i % group == 0
  21. assert c_o % group == 0
  22. # Convolution for a given input and kernel
  23. convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
  24. with tf.variable_scope(name) as scope:
  25. kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
  26. # This is the common-case. Convolve the input without any further complications.
  27. output = convolve(inp, kernel)
  28. # Add the biases
  29. if biased:
  30. biases = self.make_var('biases', [c_o])
  31. output = tf.nn.bias_add(output, biases)
  32. if relu:
  33. # ReLU non-linearity
  34. output = tf.nn.relu(output, name=scope.name)
  35. return output
  36. #prelu激活函数
  37. @layer
  38. def prelu(self, inp, name):
  39. with tf.variable_scope(name):
  40. i = int(inp.get_shape()[-1])
  41. alpha = self.make_var('alpha', shape=(i,))
  42. output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
  43. return output
  44. #池化层
  45. @layer
  46. def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
  47. self.validate_padding(padding)
  48. return tf.nn.max_pool(inp,
  49. ksize=[1, k_h, k_w, 1],
  50. strides=[1, s_h, s_w, 1],
  51. padding=padding,
  52. name=name)
  53. #全连接层
  54. @layer
  55. def fc(self, inp, num_out, name, relu=True):
  56. with tf.variable_scope(name):
  57. input_shape = inp.get_shape()
  58. if input_shape.ndims == 4:
  59. # The input is spatial. Vectorize it first.
  60. dim = 1
  61. for d in input_shape[1:].as_list():
  62. dim *= int(d)
  63. feed_in = tf.reshape(inp, [-1, dim])
  64. else:
  65. feed_in, dim = (inp, input_shape[-1].value)
  66. weights = self.make_var('weights', shape=[dim, num_out])
  67. biases = self.make_var('biases', [num_out])
  68. op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
  69. fc = op(feed_in, weights, biases, name=name)
  70. return fc
  71. """
  72. Multi dimensional softmax,
  73. refer to https://github.com/tensorflow/tensorflow/issues/210
  74. compute softmax along the dimension of target
  75. the native softmax only supports batch_size x dimension
  76. """
  77. @layer
  78. def softmax(self, target, axis, name=None):
  79. max_axis = tf.reduce_max(target, axis, keepdims=True)
  80. target_exp = tf.exp(target-max_axis)
  81. normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
  82. softmax = tf.div(target_exp, normalize, name)
  83. return softmax


  1. #R-Net网络
  2. class RNet(Network):
  3. def setup(self):
  4. (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
  5. #第一层卷积核大小为3*3,输出通道为28
  6. .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
  7. .prelu(name='prelu1')
  8. .max_pool(3, 3, 2, 2, name='pool1')
  9. # 第二层卷积核大小为3*3,输出通道为48
  10. .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
  11. .prelu(name='prelu2')
  12. .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
  13. # 第三层卷积核大小为2*2,输出通道为64
  14. .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
  15. .prelu(name='prelu3')
  16. # 第四层全连接网络,输出为128
  17. .fc(128, relu=False, name='conv4')
  18. .prelu(name='prelu4')
  19. # 全连接层,这里是face classification的输出,输出为2
  20. .fc(2, relu=False, name='conv5-1')
  21. .softmax(1,name='prob1'))
  22. # 全连接层,这里是bounding box regression的输出,输出为4
  23. (self.feed('prelu4') #pylint: disable=no-value-for-parameter
  24. .fc(4, relu=False, name='conv5-2'))
  25. #O-Net网络
  26. class ONet(Network):
  27. def setup(self):
  28. (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
  29. # 第一层卷积核大小为3*3,输出通道为32
  30. .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
  31. .prelu(name='prelu1')
  32. .max_pool(3, 3, 2, 2, name='pool1')
  33. # 第二层卷积核大小为3*3,输出通道为64
  34. .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
  35. .prelu(name='prelu2')
  36. .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
  37. # 第三层卷积核大小为3*3,输出通道为64
  38. .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
  39. .prelu(name='prelu3')
  40. .max_pool(2, 2, 2, 2, name='pool3')
  41. # 第四层卷积核大小为2*2,输出通道为128
  42. .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
  43. .prelu(name='prelu4')
  44. # 全连接层,输出为256
  45. .fc(256, relu=False, name='conv5')
  46. .prelu(name='prelu5')
  47. # 全连接层,这里是face classification的输出,输出为2
  48. .fc(2, relu=False, name='conv6-1')
  49. .softmax(1, name='prob1'))
  50. # 全连接层,这里是bounding box regression的输出,输出为4
  51. (self.feed('prelu5') #pylint: disable=no-value-for-parameter
  52. .fc(4, relu=False, name='conv6-2'))
  53. # 全连接层,这里是Facial landmark localization的输出,输出为10
  54. (self.feed('prelu5') #pylint: disable=no-value-for-parameter
  55. .fc(10, relu=False, name='conv6-3'))




  1. #检测人脸,返回人脸框和五个关键点的坐标
  2. def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
  3. """Detects faces in an image, and returns bounding boxes and points for them.
  4. img: input image
  5. minsize: minimum faces' size
  6. pnet, rnet, onet: caffemodel
  7. threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
  8. factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
  9. """
  10. factor_count=0
  11. total_boxes=np.empty((0,9))
  12. points=np.empty(0)
  13. #获取输入的图片的宽高
  14. h=img.shape[0]
  15. w=img.shape[1]
  16. #宽/高,谁小取谁
  17. minl=np.amin([h, w])
  18. m=12.0/minsize
  19. minl=minl*m
  20. # create scale pyramid
  21. #创建比例金字塔
  22. scales=[]
  23. while minl>=12:
  24. scales += [m*np.power(factor, factor_count)]
  25. minl = minl*factor
  26. factor_count += 1


  1. #imap:框是人脸的可信度
  2. #reg:所有人脸框
  3. #scale:图片缩减比例
  4. #t:阈值
  5. def generateBoundingBox(imap, reg, scale, t):
  6. """Use heatmap to generate bounding boxes"""
  7. stride=2
  8. cellsize=12
  9. imap = np.transpose(imap)
  10. #获取x1,y1,x2,y2的坐标
  11. dx1 = np.transpose(reg[:,:,0])
  12. dy1 = np.transpose(reg[:,:,1])
  13. dx2 = np.transpose(reg[:,:,2])
  14. dy2 = np.transpose(reg[:,:,3])
  15. #获取可信度大于阈值的人脸框的坐标
  16. y, x = np.where(imap >= t)
  17. #只有一个符合的情况
  18. if y.shape[0]==1:
  19. dx1 = np.flipud(dx1)
  20. dy1 = np.flipud(dy1)
  21. dx2 = np.flipud(dx2)
  22. dy2 = np.flipud(dy2)
  23. #筛选出符合条件的框
  24. score = imap[(y,x)]
  25. reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
  26. if reg.size==0:
  27. reg = np.empty((0,3))
  28. #还原尺度
  29. bb = np.transpose(np.vstack([y,x]))
  30. q1 = np.fix((stride*bb+1)/scale)
  31. q2 = np.fix((stride*bb+cellsize-1+1)/scale)
  32. # shape(None, 9)
  33. boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
  34. return boundingbox, reg


  1. # function pick = nms(boxes,threshold,type)
  2. # 非极大值抑制,去掉重复的检测框
  3. def nms(boxes, threshold, method):
  4. if boxes.size==0:
  5. return np.empty((0,3))
  6. #框
  7. x1 = boxes[:,0]
  8. y1 = boxes[:,1]
  9. x2 = boxes[:,2]
  10. y2 = boxes[:,3]
  11. #得分值,即可信度
  12. s = boxes[:,4]
  13. area = (x2-x1+1) * (y2-y1+1)
  14. #排序,从小到大,返回的是坐标
  15. I = np.argsort(s)
  16. pick = np.zeros_like(s, dtype=np.int16)
  17. counter = 0
  18. while I.size>0:
  19. i = I[-1]
  20. pick[counter] = i
  21. counter += 1
  22. idx = I[0:-1]
  23. xx1 = np.maximum(x1[i], x1[idx])
  24. yy1 = np.maximum(y1[i], y1[idx])
  25. xx2 = np.minimum(x2[i], x2[idx])
  26. yy2 = np.minimum(y2[i], y2[idx])
  27. w = np.maximum(0.0, xx2-xx1+1)
  28. h = np.maximum(0.0, yy2-yy1+1)
  29. inter = w * h
  30. if method is 'Min':
  31. o = inter / np.minimum(area[i], area[idx])
  32. else:
  33. o = inter / (area[i] + area[idx] - inter)
  34. I = I[np.where(o<=threshold)]
  35. pick = pick[0:counter]
  36. return pick


  1. if boxes.size>0 and pick.size>0:
  2. boxes = boxes[pick,:]
  3. total_boxes = np.append(total_boxes, boxes, axis=0)



  1. numbox = total_boxes.shape[0]
  2. if numbox>0:
  3. # 再经过nms筛选掉一些可靠度更低的人脸框
  4. pick = nms(total_boxes.copy(), 0.7, 'Union')
  5. total_boxes = total_boxes[pick,:]
  6. #获取每个人脸框的宽高
  7. regw = total_boxes[:,2]-total_boxes[:,0]
  8. regh = total_boxes[:,3]-total_boxes[:,1]
  9. # 对人脸框坐标做一些处理,使得人脸框更紧凑
  10. qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
  11. qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
  12. qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
  13. qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
  14. total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
  15. total_boxes = rerec(total_boxes.copy())
  16. total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
  17. dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)


  1. #第二步,经过R-Net网络
  2. numbox = total_boxes.shape[0]
  3. if numbox>0:
  4. # second stage
  5. tempimg = np.zeros((24,24,3,numbox))
  6. for k in range(0,numbox):
  7. tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
  8. tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
  9. if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
  10. #R-Net输入大小为24*24,所以要进行缩放
  11. tempimg[:,:,:,k] = imresample(tmp, (24, 24))
  12. else:
  13. return np.empty()
  14. tempimg = (tempimg-127.5)*0.0078125
  15. tempimg1 = np.transpose(tempimg, (3,1,0,2))
  16. #经过R-Net网络
  17. out = rnet(tempimg1)
  18. out0 = np.transpose(out[0])
  19. out1 = np.transpose(out[1])
  20. score = out1[1,:]
  21. ipass = np.where(score>threshold[1])
  22. total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
  23. mv = out0[:,ipass[0]]
  24. if total_boxes.shape[0]>0:
  25. pick = nms(total_boxes, 0.7, 'Union')
  26. total_boxes = total_boxes[pick,:]
  27. total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
  28. total_boxes = rerec(total_boxes.copy())




  1. #第三步,经过O-Net网络
  2. numbox = total_boxes.shape[0]
  3. if numbox>0:
  4. # third stage
  5. total_boxes = np.fix(total_boxes).astype(np.int32)
  6. dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
  7. tempimg = np.zeros((48,48,3,numbox))
  8. for k in range(0,numbox):
  9. tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
  10. tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
  11. if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
  12. # O-Net输入大小为48*48,所以要进行缩放
  13. tempimg[:,:,:,k] = imresample(tmp, (48, 48))
  14. else:
  15. return np.empty()
  16. tempimg = (tempimg-127.5)*0.0078125
  17. tempimg1 = np.transpose(tempimg, (3,1,0,2))
  18. # 经过O-Net网络
  19. out = onet(tempimg1)
  20. out0 = np.transpose(out[0])
  21. out1 = np.transpose(out[1])
  22. out2 = np.transpose(out[2])
  23. score = out2[1,:]
  24. points = out1
  25. ipass = np.where(score>threshold[2])
  26. points = points[:,ipass[0]]
  27. total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
  28. mv = out0[:,ipass[0]]
  29. w = total_boxes[:,2]-total_boxes[:,0]+1
  30. h = total_boxes[:,3]-total_boxes[:,1]+1
  31. points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
  32. points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
  33. if total_boxes.shape[0]>0:
  34. total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
  35. pick = nms(total_boxes.copy(), 0.7, 'Min')
  36. total_boxes = total_boxes[pick,:]
  37. points = points[:,pick]


  1. #显示人脸框和关键点
  2. for i in range(len(total_boxes)):
  3. x1 = total_boxes[:, 0]
  4. y1 = total_boxes[:, 1]
  5. x2 = total_boxes[:, 2]
  6. y2 = total_boxes[:, 3]
  7. print('lll', x1[i], y1[i], x2[i], y2[i])
  8. plt.gca().add_patch(
  9. plt.Rectangle((x1[i], y1[i]), x2[i] - x1[i], y2[i] - y1[i], edgecolor='r', facecolor='none'))
  10. plt.scatter(points[0], points[5], c='red')
  11. plt.scatter(points[1], points[6], c='red')
  12. plt.scatter(points[2], points[7], c='red')
  13. plt.scatter(points[3], points[8], c='red')
  14. plt.scatter(points[4], points[9], c='red')
  15. plt.imshow(scale_img)
  16. plt.show()
  17. exit()












