1. 检测的评估函数

  1. # reference: https://github.com/eriklindernoren/PyTorch-YOLOv3/blob/f917503ffe4a21d2b1148d8cb13b89b834517d76/utils/utils.py
  2. def ap_per_class(tp, conf, pred_cls, target_cls):
  3. """ 通过召回率与精确度曲线计算mAP
  4. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
  5. # 参数说明
  6. tp: True positives (list).
  7. conf: 置信度[0,1] (list).
  8. pred_cls: 预测的目标类别 (list).
  9. target_cls: 真正的目标类别 (list).
  10. # 返回
  11. [precision,recall,average precision,f1, classes_num]
  12. """
  13. # 按照预测的置信度做降序排列, 得到排序的索引
  14. i = np.argsort(-conf)
  15. tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
  16. # 去除重复项
  17. unique_classes = np.unique(target_cls)
  18. # 为每个类别创建精度-召回曲线, 计算AP
  19. ap, p, r = [], [], []
  20. for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
  21. # 找出等于c的位置
  22. i = pred_cls == c
  23. # 类别c的人工标注目标的数量
  24. n_gt = (target_cls == c).sum()
  25. # 类别c的预测目标数量
  26. n_p = i.sum()
  27. if n_p == 0 and n_gt == 0:
  28. continue
  29. elif n_p == 0 or n_gt == 0:
  30. ap.append(0)
  31. r.append(0)
  32. p.append(0)
  33. else:
  34. # 累加计算FPs与TPs
  35. fpc = (1 - tp[i]).cumsum() #
  36. tpc = (tp[i]).cumsum()
  37. # Recall
  38. recall_curve = tpc / (n_gt + 1e-16) # TP/(TP + FN) (TP+FN)为当前类人工标注目标数量
  39. r.append(recall_curve[-1]) # 这个类别的召回率
  40. # Precision
  41. precision_curve = tpc / (tpc + fpc) # TP/(TP + FP) (TP+FP)为预测框的数量
  42. p.append(precision_curve[-1]) # 这个类别的精确率
  43. # 从召回率-精确率曲线计算AP
  44. ap.append(compute_ap(recall_curve, precision_curve))
  45. # Compute F1 score (harmonic mean of precision and recall)
  46. p, r, ap = np.array(p), np.array(r), np.array(ap)
  47. f1 = 2 * p * r / (p + r + 1e-16)
  48. return p, r, ap, f1, unique_classes.astype("int32")

2. batch预测数据的统计

(1) 数据加载。通过统计非极大值抑制后得到的outputs与人工标注框的targets条目,得到TP(目标预测正确)。统计的数据为一个batch的,保存的数据为statistics_data.pt,输出数据的shape。

  1. # 非极大值抑制后的outputs,与targets
  2. batch_statistics = torch.load('statistics_data.pt',map_location='cpu')
  3. outputs = batch_statistics['outputs']
  4. targets = batch_statistics['targets']
  5. print('outputs_shape: ',[x.shape for x in outputs])
  6. print('targets_size: ', targets.shape,end='\n\n')



(2) 统计TP。当某幅图像的某个预测框与targets中的真实框的IoU大于某个阈值,则表示该预测框能够作为targets中真实框的预测值。

  1. # output为某幅图像的预测框,初始化tp
  2. true_positives = np.zeros(output.shape[0])
  3. # 找出targets中batch_idx=i的class_idx与标注框坐标
  4. annotations = targets[targets[:, 0] == i][:, 1:]
  5. if len(annotations):
  6. # 真实的目标类别代号,标注框
  7. target_labels = annotations[:, 0]
  8. target_boxes = annotations[:, 1:]
  9. print('target_boxes: ',target_boxes,',target_labels: ',target_labels)
  10. detected_boxes = []
  11. # 遍历预测框索引,预测框坐标,预测类别代号
  12. for pred_i,(pred_box, pred_cls) in enumerate(zip(output[:,:4],output[:,-1])):
  13. if len(detected_boxes) == len(target_boxes): break
  14. if pred_cls not in target_labels: continue
  15. # 预测框去拟合目标框(通过最大的IoU加阈值判断)
  16. iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
  17. print('pred_{}'.format(pred_i),iou, ',box_index: ',box_index)
  18. if iou >= iou_threshold and box_index not in detected_boxes:
  19. # 预测真实值则赋值相应位置为1
  20. true_positives[pred_i] = 1
  21. detected_boxes += [box_index]


[true_positives, pred_confs, pred_cls]:


  1. # batch统计
  2. batch_metrics = []
  3. for i,output in enumerate(outputs):
  4. true_positives = np.zeros(output.shape[0])
  5. annotations = targets[targets[:, 0] == i][:, 1:]
  6. if len(annotations):
  7. target_boxes = annotations[:, 1:]
  8. target_labels = annotations[:, 0]
  9. detected_boxes = []
  10. for pred_i,(pred_box, pred_cls) in enumerate(zip(output[:,:4],output[:,-1])):
  11. if len(detected_boxes) == len(target_boxes): break
  12. if pred_cls not in target_labels: continue
  13. iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
  14. if iou >= iou_threshold and box_index not in detected_boxes:
  15. true_positives[pred_i] = 1
  16. detected_boxes += [box_index]
  17. # 每幅图像的TP,预测的目标置信度,预测类别代号
  18. batch_metrics.append([true_positives,output[:,4],output[:,-1]])

3. Recall、Precision、F1、AP、mAP计算

(1) 获取batch统计的结果与排序


[[true_positives1, pred_confs1, pred_cls1],

[true_positives2, pred_confs2, pred_cls2],

[true_positives3, pred_confs3, pred_cls3],


通过解包重组: list(zip(*batch_metrics)),然后得到如下:

[(true_positives1, true_positives2, ......),

(pred_confs1, pred_confs2, ......),

(pred_cls1, pred_cls2, ......) ]

  1. # 解包一个batch的数据
  2. true_positives, pred_confs, pred_cls = [np.concatenate(x, 0) for x in list(zip(*batch_metrics))]


  1. # 按照置信度降序排序,排序tp,conf,cls
  2. idx = np.argsort(-pred_confs)
  3. tp, pre_confs, pred_cls = true_positives[idx],pred_confs[idx],pred_cls[idx]

(2) 评估一个batch的预测结果。通常情况是通过第2步的统计,统计出整个验证图片库的[true_positives, pred_confs, pred_cls]数据,然后再计算相应的评估值。

a. 召回率、精确度曲线

  1. # 人工标注的类别
  2. unique_cls = np.unique(target_cls)
  3. # 每个类别创建精度-召回曲线, 计算AP
  4. ap, p, r = [], [], []
  5. for num, c in enumerate(unique_cls):
  6. idx = pred_cls==c
  7. # 类别c的人工标注目标的数量
  8. n_gt = (target_cls == c).sum().numpy()
  9. n_p = idx.sum()
  10. print('n_gt: ',n_gt,',n_p: ',n_p)
  11. if n_p == 0 and n_gt == 0:
  12. continue
  13. elif n_p == 0 or n_gt == 0:
  14. ap.append(0)
  15. r.append(0)
  16. p.append(0)
  17. else:
  18. # 累加计算FPs与TPs
  19. fpc = (1 - tp[idx]).cumsum()
  20. tpc = tp[idx].cumsum()
  21. # 召回率
  22. recall_curve = tpc/(n_gt + 1e-16) # TP/(TP+FN)
  23. r.append(recall_curve[-1])
  24. # 精确率
  25. precision_curve = tpc/(tpc+fpc) # TP/(TP+FP)
  26. p.append(precision_curve[-1])
  27. print('recall_curve: ',recall_curve,'\nprecision_curve: ',precision_curve)


b. AP计算



  1. # 计算AP,检测评估函数compute_ap
  2. mrec = np.concatenate(([0.0], recall_curve, [1.0]))
  3. mpre = np.concatenate(([1.0], precision_curve, [0.0]))
  4. for i in range(mpre.size - 1, 0, -1):
  5. mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
  6. # 找出召回率有梯度的位置
  7. idx = np.where(mrec[1:] != mrec[:-1])[0]
  8. # 计算面积 sum(delta_recall*precision)
  9. ap_c = np.sum((mrec[idx+1] - mrec[idx])*mpre[idx + 1])

c. 计算F1

f1 = 2 * p * r / (p + r + 1e-16)

d. 总体代码

  1. # 评估一个batch的预测结果
  2. true_positives, pred_confs, pred_cls = [np.concatenate(x, 0) for x in list(zip(*batch_metrics))]
  3. target_cls = targets[:,1]
  4. # 按照置信度降序排序,排序tp,conf,cls
  5. idx = np.argsort(-pred_confs)
  6. tp, pre_confs, pred_cls = true_positives[idx],pred_confs[idx],pred_cls[idx]
  7. # 得到人工标注的类别
  8. unique_cls = np.unique(target_cls)
  9. # 为每个类别创建精度-召回曲线, 计算AP
  10. ap, p, r = [], [], []
  11. for num, c in enumerate(unique_cls):
  12. idx = pred_cls==c
  13. # 类别c的人工标注目标的数量
  14. n_gt = (target_cls == c).sum().numpy()
  15. n_p = idx.sum()
  16. if n_p == 0 and n_gt == 0:
  17. continue
  18. elif n_p == 0 or n_gt == 0:
  19. ap.append(0)
  20. r.append(0)
  21. p.append(0)
  22. else:
  23. # 累加计算FPs与TPs
  24. fpc = (1 - tp[idx]).cumsum()
  25. tpc = tp[idx].cumsum()
  26. # 召回率
  27. recall_curve = tpc/(n_gt + 1e-16) # TP/(TP+FN)
  28. r.append(recall_curve[-1])
  29. # 精确率
  30. precision_curve = tpc/(tpc+fpc) # TP/(TP+FP)
  31. p.append(precision_curve[-1])
  32. # 计算AP
  33. mrec = np.concatenate(([0.0], recall_curve, [1.0]))
  34. mpre = np.concatenate(([1.0], precision_curve, [0.0]))
  35. for i in range(mpre.size - 1, 0, -1):
  36. mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
  37. # 找出召回率有梯度的位置
  38. idx = np.where(mrec[1:] != mrec[:-1])[0]
  39. # 计算面积
  40. ap_c = np.sum((mrec[idx+1] - mrec[idx])*mpre[idx + 1])
  41. ap.append(ap_c)
  42. # batch中所有类别
  43. p, r, ap = np.array(p), np.array(r), np.array(ap)
  44. f1 = 2 * p * r / (p + r + 1e-16)
  45. #[类别,召回率,精确率,F1, AP]
  46. str_list = ['%d \t %.3f \t %.3f \t %.3f \t %.3f'%(x[0],x[1],x[2],x[3],x[4]) for x in np.array([unique_cls,r,p,f1,ap]).T]
  47. print('cls_num | recall | precision | F1 | AP')
  48. for x in str_list:
  49. print(x)
  50. print('mAP:', np.mean(ap))


