相信刚上手yolov5的小伙伴,对train.py和val.py 中的评价指标很疑惑吧。


1.混淆矩阵(confusion matrix)



(1) 真实值是正确的,预测值也是正确的。称为TP

(2) 真实值是错误的,但是预测值是错误的。称为TN

(3) 真实值是正确的,预测值是错误的。称为FN

(4) 真实值是错误的,预测值是正确的。称为FP







 以上三个当然是越高越好。一般yolov5给出的混淆矩阵都是经过归一化 的。看对角线的值越大越好。


官方文档为:kaanakan/object_detection_confusion_matrix: Python class for calculating confusion matrix for object detection task (github.com)




confusion_matrix = ConfusionMatrix(nc=nc)


  1. class ConfusionMatrix:
  2. # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
  3. def __init__(self, nc, conf=0.25, iou_thres=0.45):
  4. self.matrix = np.zeros((nc + 1, nc + 1))
  5. self.nc = nc # number of classes
  6. self.conf = conf
  7. self.iou_thres = iou_thres


  1. if plots:
  2. confusion_matrix.process_batch(predn, labelsn)

 plots为run 的参数,本为ture。调用ConfusionMatrix类中的process_batch函数。

  1. def process_batch(self, detections, labels): #detections为预测的框,labels为真实的框
  2. """
  3. Return intersection-over-union (Jaccard index) of boxes.
  4. Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
  5. Arguments:
  6. detections (Array[N, 6]), x1, y1, x2, y2, conf, class
  7. labels (Array[M, 5]), class, x1, y1, x2, y2
  8. Returns:
  9. None, updates confusion matrix accordingly
  10. """
  11. detections = detections[detections[:, 4] > self.conf] # 如果预测框的之置信度大于设置的置信度,即上面初始化的置信度conf=0.25,时,保留这个预测框
  12. gt_classes = labels[:, 0].int() # 获取真实框
  13. detection_classes = detections[:, 5].int() #获取第几类标签
  14. iou = box_iou(labels[:, 1:], detections[:, :4]) #两者交叉的面积
  15. x = torch.where(iou > self.iou_thres) #相交的面积大于0.45
  16. if x[0].shape[0]:
  17. matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
  18. if x[0].shape[0] > 1: #获取最大阈值下的预测框
  19. matches = matches[matches[:, 2].argsort()[::-1]]
  20. matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
  21. matches = matches[matches[:, 2].argsort()[::-1]]
  22. matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
  23. else:
  24. matches = np.zeros((0, 3))
  25. n = matches.shape[0] > 0 # 满足条件的iou是否大于0个 bool
  26. m0, m1, _ = matches.transpose().astype(np.int16) #m0为真实正样本框的索引值,m1时预测正样本索引值
  27. for i, gc in enumerate(gt_classes):
  28. j = m0 == i
  29. if n and sum(j) == 1: # 简而言之,真实框预测到了。但是不一定是正样本类,为TP+TN
  30. self.matrix[detection_classes[m1[j]], gc] += 1 # correct
  31. else:
  32. self.matrix[self.nc, gc] += 1 # background FP 没预测到,成为了背景板
  33. if n:
  34. for i, dc in enumerate(detection_classes):
  35. if not any(m1 == i):
  36. self.matrix[dc, self.nc] += 1 # background FN 背景板加1


  1. if plots:
  2. confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
  3. callbacks.run('on_val_end')


  1. def plot(self, normalize=True, save_dir='', names=()):
  2. try:
  3. import seaborn as sn
  4. array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-6) if normalize else 1) # normalize columns
  5. array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
  6. fig = plt.figure(figsize=(12, 9), tight_layout=True)
  7. sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
  8. labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
  9. with warnings.catch_warnings():
  10. warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered
  11. sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
  12. xticklabels=names + ['background FP'] if labels else "auto",
  13. yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
  14. fig.axes[0].set_xlabel('True')
  15. fig.axes[0].set_ylabel('Predicted')
  16. fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
  17. plt.close()
  18. except Exception as e:
  19. print(f'WARNING: ConfusionMatrix plot failure: {e}')


 部分代码参考:【YOLOV5-5.x 源码解读】metrics.py_yolov5 metrics.py_满船清梦压星河HK的博客-CSDN博客


当想要同时控制风险 ( recall ) 和成本 ( precision )怎么办?那就用 F1 Score 。















AP(average precision ):平均精度。虽然叫平均精度,但是代表着对一个类精度的判断。而且也不是对precision求平均,是计算PR图中PR线与坐标轴的面积。如果一个模型的AP越大,也就是说PR曲线与坐标轴围成的面积越大,Precision与Recall在整体上也相对较高。

MAP(mean of average precision):对所有类别的AP求平均值。AP反映的是对一个精度的判断,MPA反映对所有类精度的判断,当然越接近1越好啦。平时我们说的,某一目标检测算法的准确率达到了多少,这个准确率就泛指mAP。





部分参考:YOLO 模型的评估指标——IOU、Precision、Recall、F1-score、mAP_yolo评价指标_G.E.N.的博客-CSDN博客



  1. s = ('%20s' + '%11s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
  2. dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 #初始化


  1. if len(stats) and stats[0].any():
  2. p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) #Plot precision-recall curve at mAP@0.5
  3. ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
  4. mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
  5. nt = np.bincount(stats[3].astype(np.int64), minlength=nc)


  1. #tp:整个数据集所有图片中所有预测框在每一个iou条件下(0.5~0.95)10个是否是TP
  2. # conf:整个数据集所有图片的所有预测框的置信度
  3. # pred_cls:预测类别
  4. # plot:是否演示
  5. def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
  6. """ Compute the average precision, given the recall and precision curves.
  7. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
  8. # Arguments
  9. tp: True positives (nparray, nx1 or nx10).
  10. conf: Objectness value from 0-1 (nparray).
  11. pred_cls: Predicted object classes (nparray).
  12. target_cls: True object classes (nparray).
  13. plot: Plot precision-recall curve at mAP@0.5
  14. save_dir: Plot save directory
  15. # Returns
  16. The average precision as computed in py-faster-rcnn.
  17. """
  18. # 计算mAP 需要将tp按照conf降序排列
  19. # Sort by objectness 按conf从大到小排序 返回数据对应的索引
  20. i = np.argsort(-conf)
  21. tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
  22. #一个shape为(n, 10)的的数组,其中n是测试集检测出的所有物体总和,10表示的是该物体在(
  23. # Find unique classes 对类别去重, 因为计算ap是对每类进行
  24. unique_classes = np.unique(target_cls) #target_cls统计类别和数量
  25. nc = unique_classes.shape[0] # number of classes, number of detections
  26. # Create Precision-Recall curve and compute AP for each class
  27. px, py = np.linspace(0, 1, 1000), [] # for plotting
  28. ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) #初始化
  29. for ci, c in enumerate(unique_classes):
  30. i = pred_cls == c # i: 记录着所有预测框是否是c类别框 是c类对应位置为True, 否则为False
  31. n_l = (target_cls == c).sum() # number of labels 召回率 真实的样本数量
  32. n_p = i.sum() # number of predictions 准确率,计算框有多少个
  33. if n_p == 0 or n_l == 0:
  34. continue
  35. else:
  36. # Accumulate FPs and TPs
  37. fpc = (1 - tp[i]).cumsum(0)
  38. tpc = tp[i].cumsum(0)
  39. # Recall
  40. recall = tpc / (n_l + 1e-16) # recall curve
  41. r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
  42. # Precision
  43. precision = tpc / (tpc + fpc) # precision curve
  44. p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
  45. # AP from recall-precision curve 对c类别, 分别计算每一个iou阈值(0.5~0.95 10个)下的mAP
  46. for j in range(tp.shape[1]):
  47. ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
  48. if plot and j == 0:
  49. py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
  50. # Compute F1 (harmonic mean of precision and recall)
  51. f1 = 2 * p * r / (p + r + 1e-16)
  52. names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
  53. names = {i: v for i, v in enumerate(names)} # to dict
  54. if plot:
  55. plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
  56. plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
  57. plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
  58. plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
  59. i = f1.mean(0).argmax() # max F1 index
  60. return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')



  1. fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
  2. if fi > best_fitness:
  3. best_fitness = fi
  4. log_vals = list(mloss) + list(results) + lr
  5. callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)



  1. def fitness(x):
  2. # Model fitness as a weighted combination of metrics
  3. w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
  4. return (x[:, :4] * w).sum(1)

 w 就是这几个权重的占比。



