当前位置:   article > 正文

Yolov1 源码讲解 detect.py_yolov1 keras源码

yolov1 keras源码

讲完了训练部分 接下来是检测部分

惯例看看结构

VOC_CLASS_BGR是不同类别应该用什么颜色画框容易区分,比如A用红色,B用绿色,不容易在途中颜色混在一起

画框框

  1. def visualize_boxes(image_bgr, boxes, class_names, probs, name_bgr_dict=None, line_thickness=2):
  2. if name_bgr_dict is None:
  3. name_bgr_dict = VOC_CLASS_BGR
  4. image_boxes = image_bgr.copy()#分配到新内存中去
  5. for box, class_name, prob in zip(boxes, class_names, probs):
  6. # Draw box on the image.
  7. left_top, right_bottom = box
  8. left, top = int(left_top[0]), int(left_top[1])
  9. right, bottom = int(right_bottom[0]), int(right_bottom[1])
  10. bgr = name_bgr_dict[class_name]
  11. cv2.rectangle(image_boxes, (left, top), (right, bottom), bgr, thickness=line_thickness)
  12. # Draw text on the image.
  13. text = '%s %.2f' % (class_name, prob)
  14. size, baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=2)
  15. text_w, text_h = size
  16. x, y = left, top
  17. x1y1 = (x, y)
  18. x2y2 = (x + text_w + line_thickness, y + text_h + line_thickness + baseline)
  19. cv2.rectangle(image_boxes, x1y1, x2y2, bgr, -1)
  20. cv2.putText(image_boxes, text, (x + line_thickness, y + 2*baseline + line_thickness),
  21. cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, color=(255, 255, 255), thickness=1, lineType=8)
  22. return image_boxes

传入计算好的box和class名和可能性的值 这里遍历画出来 此处四个值两个坐标全为以图片真实像素大小的值,不再是归一化

取出左上,右下坐标 cv2画出来,从name_bgr_dict中取出 本class应该对应的什么颜色 然后画框

接着做出文本,在以框左上角开始为起始坐标,往右下方向画小正方形填入种类名和概率, 就像这样

解析YOLODetector类

  1. def __init__(self,
  2. model_path, class_name_list=None, mean_rgb=[122.67891434, 116.66876762, 104.00698793],
  3. conf_thresh=0.1, prob_thresh=0.1, nms_thresh=0.5,
  4. gpu_id=0):
  5. os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
  6. use_gpu = torch.cuda.is_available()
  7. assert use_gpu, 'Current implementation does not support CPU mode. Enable CUDA.'
  8. # Load YOLO model.
  9. print("Loading YOLO model...")
  10. self.yolo = resnet50()#这里就已经有了 随机的参数w权重
  11. sd = torch.load(model_path)
  12. self.yolo.load_state_dict(sd)#读取原来模型的权重
  13. self.yolo.cuda()
  14. print("Done loading!")
  15. self.yolo.eval()
  16. self.S = 7
  17. self.B = 2
  18. self.C = 20
  19. self.class_name_list = class_name_list if (class_name_list is not None) else list(VOC_CLASS_BGR.keys())#给数据集里指定的list还是自己重新定义class list
  20. assert len(self.class_name_list) == self.C
  21. self.mean = np.array(mean_rgb, dtype=np.float32)
  22. assert self.mean.shape == (3,)
  23. self.conf_thresh = conf_thresh
  24. self.prob_thresh = prob_thresh
  25. self.nms_thresh = nms_thresh
  26. self.to_tensor = transforms.ToTensor()
  27. # Warm up. dummy_input 虚拟输入
  28. dummy_input = Variable(torch.zeros((1, 3, 448, 448)))
  29. dummy_input = dummy_input.cuda()
  30. for i in range(3): #为了初始化权重? 为什么 -预热操作的目的是让模型尽可能地填满加速器的缓存
  31. self.yolo(dummy_input) #self.yolo.state_dict().get('conv1.weight')

yolo初始化模型,并读取训练好的model_path位置的权重,放入gpu

用dummy_input ,为gpu热身 先占满缓存不怕防止后面检测过程显存,内存或缓存爆了

  1. def detect(self, image_bgr, image_size=448):
  2. """ Detect objects from given image.
  3. Args:
  4. image_bgr: (numpy array) input image in BGR ids_sorted, sized [h, w, 3].
  5. image_size: (int) image width and height to which input image is resized.
  6. Returns:
  7. boxes_detected: (list of tuple) box corner list like [((x1, y1), (x2, y2))_obj1, ...]. Re-scaled for original input image size.
  8. class_names_detected: (list of str) list of class name for each detected boxe.
  9. probs_detected: (list of float) list of probability(=confidence x class_score) for each detected box.
  10. """
  11. h, w, _ = image_bgr.shape
  12. img = cv2.resize(image_bgr, dsize=(image_size, image_size), interpolation=cv2.INTER_LINEAR)
  13. img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # assuming the model is trained with RGB images.
  14. img = (img - self.mean) / 255.0
  15. img = self.to_tensor(img) # [image_size, image_size, 3] -> [3, image_size, image_size]
  16. img = img[None, :, :, :] # [3, image_size, image_size] -> [1, 3, image_size, image_size]扩大维度 第一维是batch
  17. img = Variable(img)
  18. img = img.cuda()
  19. with torch.no_grad():
  20. pred_tensor = self.yolo(img)
  21. pred_tensor = pred_tensor.cpu().data
  22. pred_tensor = pred_tensor.squeeze(0) # squeeze batch dimension.
  23. # Get detected boxes_detected, labels, confidences, class-scores.
  24. boxes_normalized_all, class_labels_all, confidences_all, class_scores_all = self.decode(pred_tensor)
  25. if boxes_normalized_all.size(0) == 0:
  26. return [], [], [] # if no box found, return empty lists.
  27. # Apply non maximum supression for boxes of each class.
  28. boxes_normalized, class_labels, probs = [], [], []
  29. for class_label in range(len(self.class_name_list)):
  30. mask = (class_labels_all == class_label)
  31. if torch.sum(mask) == 0:
  32. continue # if no box found, skip that class.
  33. # 找出所有同一类的 进行nms
  34. boxes_normalized_masked = boxes_normalized_all[mask]
  35. class_labels_maked = class_labels_all[mask]
  36. confidences_masked = confidences_all[mask]
  37. class_scores_masked = class_scores_all[mask]
  38. ids = self.nms(boxes_normalized_masked, confidences_masked) #非极大抑制
  39. boxes_normalized.append(boxes_normalized_masked[ids])
  40. class_labels.append(class_labels_maked[ids])
  41. probs.append(confidences_masked[ids] * class_scores_masked[ids])
  42. boxes_normalized = torch.cat(boxes_normalized, 0)
  43. class_labels = torch.cat(class_labels, 0)
  44. probs = torch.cat(probs, 0)
  45. # Postprocess for box, labels, probs.
  46. boxes_detected, class_names_detected, probs_detected = [], [], []
  47. for b in range(boxes_normalized.size(0)):
  48. box_normalized = boxes_normalized[b]
  49. class_label = class_labels[b]
  50. prob = probs[b]
  51. x1, x2 = w * box_normalized[0], w * box_normalized[2] # unnormalize x with image width. 图片真实坐标 从左上开始 0
  52. y1, y2 = h * box_normalized[1], h * box_normalized[3] # unnormalize y with image height.
  53. boxes_detected.append(((x1, y1), (x2, y2)))
  54. class_label = int(class_label) # convert from LongTensor to int.
  55. class_name = self.class_name_list[class_label]
  56. class_names_detected.append(class_name)
  57. prob = float(prob) # convert from Tensor to float.
  58. probs_detected.append(prob)
  59. return boxes_detected, class_names_detected, probs_detected

detect检测主函数:

因为opencv中读取出来是bgr,与rgb不一样 这是由于计算机视觉历史原因引起。后面cvt转成rgb

将图片uint8值先减去voc2007中的均值再归一化除以255, 减均值之后变成均值为0,/255 方差为1 符合正态分布

网络中输入大小应为(batch_size,3,448,448) 此处一张图片只有前3维,用None补一维度,放进网络输出预测detect时候的预测值 ,同时此时不需要计算梯度,提前设好no_grad 不然浪费机器计算性能。之后得到输出张量,并去掉第一维batch维

使用解码器decode解析网络预测输出的张量pred_tensor  decode部分下面再讲 先看输出结果

得到4组预测出来的bbox 条件概率(假设含有物体条件下的该类别的概率)最高都是同一类别14 看txt文件可知是person 也就是人

 接着以20个类别为循环条件开始循环,mask掩码,每次循环中看预测出来的是否再本次循环的类别中,有的话就赋予true,然后计算true里的类别内容 没有的话跳过

找出所有同一类的 进行nms,清除这些属于同一类别中,他们所有框的排列组合中iou过大,也可以视作重叠了,清除这些框,而比较的值是iou_threshold阈值

nms完事之后得到都不互相重叠的框的索引。加入汇总的list中。list再按行叠成张量便于后面返回给画框的部分。

最后的循环是按输出格式 重新格式化数据。

传入画框的地方需要四个值两个坐标全为以图片真实像素大小的值,不再是归一化。此处分别归一化乘对应的wh 变成实际大小

decode部分

  1. def decode(self, pred_tensor):
  2. """ Decode tensor into box coordinates, class labels, and probs_detected.
  3. Args:
  4. pred_tensor: (tensor) tensor to decode sized [S, S, 5 x B + C], 5=(x, y, w, h, conf)
  5. Returns:
  6. boxes: (tensor) [[x1, y1, x2, y2]_obj1, ...]. Normalized from 0.0 to 1.0 w.r.t. image width/height, sized [n_boxes, 4].
  7. labels: (tensor) class labels for each detected boxe, sized [n_boxes,].
  8. confidences: (tensor) objectness confidences for each detected box, sized [n_boxes,].
  9. class_scores: (tensor) scores for most likely class for each detected box, sized [n_boxes,].
  10. """
  11. S, B, C = self.S, self.B, self.C
  12. boxes, labels, confidences, class_scores = [], [], [], []
  13. cell_size = 1.0 / float(S)
  14. #每个网格的置信度
  15. conf = pred_tensor[:, :, 4].unsqueeze(2) # [S, S, 1]
  16. for b in range(1, B):
  17. conf = torch.cat((conf, pred_tensor[:, :, 5*b + 4].unsqueeze(2)), 2) #[S,S,2]
  18. conf_mask = conf > self.conf_thresh # [S, S, B]
  19. # TBM, further optimization may be possible by replacing the following for-loops with tensor operations.
  20. for i in range(S): # for x-dimension.
  21. for j in range(S): # for y-dimension.
  22. class_score, class_label = torch.max(pred_tensor[j, i, 5*B:], 0) #找[j,i]网格的最大分类值
  23. for b in range(B): #遍历两预测bbox
  24. conf = pred_tensor[j, i, 5*b + 4]
  25. prob = conf * class_score
  26. if float(prob) < self.prob_thresh: #低于阈值门限继续
  27. continue
  28. # Compute box corner (x1, y1, x2, y2) from tensor.
  29. box = pred_tensor[j, i, 5*b : 5*b + 4]
  30. x0y0_normalized = torch.FloatTensor([i, j]) * cell_size # 该网格的坐上角归一化坐标
  31. xy_normalized = box[:2] * cell_size + x0y0_normalized # 从对cell归一化的中心点位置还原出来 现在是对图片大小归一化
  32. wh_normalized = box[2:] # 归一化的宽高
  33. box_xyxy = torch.FloatTensor(4) # [4,]随便初始4个
  34. box_xyxy[:2] = xy_normalized - 0.5 * wh_normalized # 归一化左上X-》应该是左下角角位置(x1, y1).
  35. box_xyxy[2:] = xy_normalized + 0.5 * wh_normalized # 归一化右下X-》应该是有右上角角位置(x2, y2).
  36. # Append result to the lists.
  37. boxes.append(box_xyxy)
  38. labels.append(class_label)
  39. confidences.append(conf)
  40. class_scores.append(class_score)
  41. if len(boxes) > 0:
  42. boxes = torch.stack(boxes, 0) # [n_boxes, 4] list转张量
  43. labels = torch.stack(labels, 0) # [n_boxes, ]
  44. confidences = torch.stack(confidences, 0) # [n_boxes, ]
  45. class_scores = torch.stack(class_scores, 0) # [n_boxes, ]
  46. else:
  47. # If no box found, return empty tensors.
  48. boxes = torch.FloatTensor(0, 4)
  49. labels = torch.LongTensor(0)
  50. confidences = torch.FloatTensor(0)
  51. class_scores = torch.FloatTensor(0)
  52. return boxes, labels, confidences, class_scores

按照预测的张量返回四组数据 分别是box 标签(属于的类的下标) 置信度 类别条件概率

pred_tensor[:, :, 4]取出第一个框置信度,后面再加一维用来叠第二个框的置信度

代码中conf_mask没用到这里也不管了

按照像素进行循环取出每个像素对应最大的类别的下标和概率值

坐标计算顺序是:取出box四个值(cx,cy,w,h),box中是相对该grid cell偏移并以cell归一化的xy偏移的真实框中心的值。这里转换成相对图片归一化的坐上,右下坐标值。

其中第三重遍历2(B)个框,小于实际概率的阈值全部取出,此处实际概率与conf * class_score比(置信度*类别条件概率,也就是此处有物体的概率*假如有物体的时候该类别的概率)

若概率符合要求我们取出这个框的四个数据, 加到四组汇总数据中。

四组list分别叠成张量形式返回

nms部分

  1. def nms(self, boxes, scores):
  2. """ Apply non maximum supression.
  3. Args:
  4. Returns:
  5. """
  6. threshold = self.nms_thresh
  7. x1 = boxes[:, 0] # [n,]
  8. y1 = boxes[:, 1] # [n,]
  9. x2 = boxes[:, 2] # [n,]
  10. y2 = boxes[:, 3] # [n,]
  11. areas = (x2 - x1) * (y2 - y1) # [n,]
  12. _, ids_sorted = scores.sort(0, descending=True) # [n,]
  13. ids = []
  14. while ids_sorted.numel() > 0:
  15. # Assume `ids_sorted` size is [m,] in the beginning of this iter.
  16. #最后剩下一个的时候detach 脱离出tensor
  17. i = ids_sorted.item() if (ids_sorted.numel() == 1) else ids_sorted[0]
  18. ids.append(i)
  19. if ids_sorted.numel() == 1:
  20. break # If only one box is left (i.e., no box to supress), break.
  21. inter_x1 = x1[ids_sorted[1:]].clamp(min=x1[i]) # [m-1, ]
  22. inter_y1 = y1[ids_sorted[1:]].clamp(min=y1[i]) # [m-1, ]
  23. inter_x2 = x2[ids_sorted[1:]].clamp(max=x2[i]) # [m-1, ] 画图就懂了
  24. inter_y2 = y2[ids_sorted[1:]].clamp(max=y2[i]) # [m-1, ]
  25. inter_w = (inter_x2 - inter_x1).clamp(min=0) # [m-1, ]
  26. inter_h = (inter_y2 - inter_y1).clamp(min=0) # [m-1, ]
  27. inters = inter_w * inter_h # intersections b/w/ box `i` and other boxes, sized [m-1, ].
  28. unions = areas[i] + areas[ids_sorted[1:]] - inters # unions b/w/ box `i` and other boxes, sized [m-1, ].
  29. ious = inters / unions # [m-1, ]
  30. # Remove boxes whose IoU is higher than the threshold.#(ious <= threshold).nonzero() 形状(2,1)
  31. ids_keep = (ious <= threshold).nonzero().squeeze() # [m-1, ]. Because `nonzero()` adds extra dimension, squeeze it.
  32. if ids_keep.numel() == 0:
  33. break # If no box left, break.
  34. ids_sorted = ids_sorted[ids_keep+1] # `+1` is needed because `ids_sorted[0] = i`.
  35. return torch.LongTensor(ids)

nms比较硬核,不懂需要先看李沐的13.4. 锚框 — 动手学深度学习 2.0.0 documentation 不过实现方式有些许不同

分别取出boxes中相对于图片归一化的左上右下坐标值。算出框面积

以分数(该类中的条件概率)按从大到小排序。inter的x1 x2 y1 y2分别对应需要比较的框的左下右上,

以分数做顺序基准,每次以第一个(下标0)

检查x1,与下标为0的x1做比较,若小于下标为0的x1则自动填充为下标为0的x1

检查y1,与下标为0的y1做比较,若小于下标为0的y1则自动填充为下标为0的y1

检查x2,与下标为0的x2做比较,若大于下标为0的x2则自动填充为下标为0的x2

检查y2,与下标为0的y2做比较,若大于下标为0的y2则自动填充为下标为0的y2

动手画图更容易懂

于是计算两者的w和h,此时两者框不重合 没有交集,w和h有一个就会为负,就会填充为0,于是iou必定为0,也就是表示无交集不可能重叠而保留,其余iou不为0部分与阈值比较,作为下一组的比较对象。同时比较基准放入ids,证明该框与筛选后的比较对象无重叠部分

就这样每次与其他剩余的框比较,筛选出所有互相不重叠或者iou阈值不足以认为是重叠的框返回回detect中处理

 

  1. import torch
  2. from torch.autograd import Variable
  3. import torchvision.transforms as transforms
  4. import os
  5. import cv2
  6. import numpy as np
  7. from resnet_yolo import resnet50
  8. # VOC class names and BGR color.
  9. VOC_CLASS_BGR = {
  10. 'aeroplane': (128, 0, 0),
  11. 'bicycle': (0, 128, 0),
  12. 'bird': (128, 128, 0),
  13. 'boat': (0, 0, 128),
  14. 'bottle': (128, 0, 128),
  15. 'bus': (0, 128, 128),
  16. 'car': (128, 128, 128),
  17. 'cat': (64, 0, 0),
  18. 'chair': (192, 0, 0),
  19. 'cow': (64, 128, 0),
  20. 'diningtable': (192, 128, 0),
  21. 'dog': (64, 0, 128),
  22. 'horse': (192, 0, 128),
  23. 'motorbike': (64, 128, 128),
  24. 'person': (192, 128, 128),
  25. 'pottedplant': (0, 64, 0),
  26. 'sheep': (128, 64, 0),
  27. 'sofa': (0, 192, 0),
  28. 'train': (128, 192, 0),
  29. 'tvmonitor': (0, 64, 128)
  30. }
  31. def visualize_boxes(image_bgr, boxes, class_names, probs, name_bgr_dict=None, line_thickness=2):
  32. if name_bgr_dict is None:
  33. name_bgr_dict = VOC_CLASS_BGR
  34. image_boxes = image_bgr.copy()#分配到新内存中去
  35. for box, class_name, prob in zip(boxes, class_names, probs):
  36. # Draw box on the image.
  37. left_top, right_bottom = box
  38. left, top = int(left_top[0]), int(left_top[1])
  39. right, bottom = int(right_bottom[0]), int(right_bottom[1])
  40. bgr = name_bgr_dict[class_name]
  41. cv2.rectangle(image_boxes, (left, top), (right, bottom), bgr, thickness=line_thickness)
  42. # Draw text on the image.
  43. text = '%s %.2f' % (class_name, prob)
  44. size, baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, thickness=2)
  45. text_w, text_h = size
  46. x, y = left, top
  47. x1y1 = (x, y)
  48. x2y2 = (x + text_w + line_thickness, y + text_h + line_thickness + baseline)
  49. cv2.rectangle(image_boxes, x1y1, x2y2, bgr, -1)
  50. cv2.putText(image_boxes, text, (x + line_thickness, y + 2*baseline + line_thickness),
  51. cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, color=(255, 255, 255), thickness=1, lineType=8)
  52. return image_boxes
  53. class YOLODetector:
  54. def __init__(self,
  55. model_path, class_name_list=None, mean_rgb=[122.67891434, 116.66876762, 104.00698793],
  56. conf_thresh=0.1, prob_thresh=0.1, nms_thresh=0.5,
  57. gpu_id=0):
  58. os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
  59. use_gpu = torch.cuda.is_available()
  60. assert use_gpu, 'Current implementation does not support CPU mode. Enable CUDA.'
  61. # Load YOLO model.
  62. print("Loading YOLO model...")
  63. self.yolo = resnet50()#这里就已经有了 随机的参数w权重
  64. sd = torch.load(model_path)
  65. self.yolo.load_state_dict(sd)#读取原来模型的权重
  66. self.yolo.cuda()
  67. print("Done loading!")
  68. self.yolo.eval()
  69. self.S = 7
  70. self.B = 2
  71. self.C = 20
  72. self.class_name_list = class_name_list if (class_name_list is not None) else list(VOC_CLASS_BGR.keys())#给数据集里指定的list还是自己重新定义class list
  73. assert len(self.class_name_list) == self.C
  74. self.mean = np.array(mean_rgb, dtype=np.float32)
  75. assert self.mean.shape == (3,)
  76. self.conf_thresh = conf_thresh
  77. self.prob_thresh = prob_thresh
  78. self.nms_thresh = nms_thresh
  79. self.to_tensor = transforms.ToTensor()
  80. # Warm up. dummy_input 虚拟输入
  81. dummy_input = Variable(torch.zeros((1, 3, 448, 448)))
  82. dummy_input = dummy_input.cuda()
  83. for i in range(3): #为了初始化权重? 为什么 -预热操作的目的是让模型尽可能地填满加速器的缓存
  84. self.yolo(dummy_input) #self.yolo.state_dict().get('conv1.weight')
  85. def detect(self, image_bgr, image_size=448):
  86. """ Detect objects from given image.
  87. Args:
  88. image_bgr: (numpy array) input image in BGR ids_sorted, sized [h, w, 3].
  89. image_size: (int) image width and height to which input image is resized.
  90. Returns:
  91. boxes_detected: (list of tuple) box corner list like [((x1, y1), (x2, y2))_obj1, ...]. Re-scaled for original input image size.
  92. class_names_detected: (list of str) list of class name for each detected boxe.
  93. probs_detected: (list of float) list of probability(=confidence x class_score) for each detected box.
  94. """
  95. h, w, _ = image_bgr.shape
  96. img = cv2.resize(image_bgr, dsize=(image_size, image_size), interpolation=cv2.INTER_LINEAR)
  97. img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # assuming the model is trained with RGB images.
  98. img = (img - self.mean) / 255.0
  99. img = self.to_tensor(img) # [image_size, image_size, 3] -> [3, image_size, image_size]
  100. img = img[None, :, :, :] # [3, image_size, image_size] -> [1, 3, image_size, image_size]扩大维度 第一维是batch
  101. img = Variable(img)
  102. img = img.cuda()
  103. with torch.no_grad():
  104. pred_tensor = self.yolo(img)
  105. pred_tensor = pred_tensor.cpu().data
  106. pred_tensor = pred_tensor.squeeze(0) # squeeze batch dimension.
  107. # Get detected boxes_detected, labels, confidences, class-scores.
  108. boxes_normalized_all, class_labels_all, confidences_all, class_scores_all = self.decode(pred_tensor)
  109. if boxes_normalized_all.size(0) == 0:
  110. return [], [], [] # if no box found, return empty lists.
  111. # Apply non maximum supression for boxes of each class.
  112. boxes_normalized, class_labels, probs = [], [], []
  113. for class_label in range(len(self.class_name_list)):
  114. mask = (class_labels_all == class_label)
  115. if torch.sum(mask) == 0:
  116. continue # if no box found, skip that class.
  117. # 找出所有同一类的 进行nms
  118. boxes_normalized_masked = boxes_normalized_all[mask]
  119. class_labels_maked = class_labels_all[mask]
  120. confidences_masked = confidences_all[mask]
  121. class_scores_masked = class_scores_all[mask]
  122. ids = self.nms(boxes_normalized_masked, confidences_masked) #非极大抑制
  123. boxes_normalized.append(boxes_normalized_masked[ids])
  124. class_labels.append(class_labels_maked[ids])
  125. probs.append(confidences_masked[ids] * class_scores_masked[ids])
  126. boxes_normalized = torch.cat(boxes_normalized, 0)
  127. class_labels = torch.cat(class_labels, 0)
  128. probs = torch.cat(probs, 0)
  129. # Postprocess for box, labels, probs.
  130. boxes_detected, class_names_detected, probs_detected = [], [], []
  131. for b in range(boxes_normalized.size(0)):
  132. box_normalized = boxes_normalized[b]
  133. class_label = class_labels[b]
  134. prob = probs[b]
  135. x1, x2 = w * box_normalized[0], w * box_normalized[2] # unnormalize x with image width. 图片真实坐标 从左上开始 0
  136. y1, y2 = h * box_normalized[1], h * box_normalized[3] # unnormalize y with image height.
  137. boxes_detected.append(((x1, y1), (x2, y2)))
  138. class_label = int(class_label) # convert from LongTensor to int.
  139. class_name = self.class_name_list[class_label]
  140. class_names_detected.append(class_name)
  141. prob = float(prob) # convert from Tensor to float.
  142. probs_detected.append(prob)
  143. return boxes_detected, class_names_detected, probs_detected
  144. def decode(self, pred_tensor):
  145. """ Decode tensor into box coordinates, class labels, and probs_detected.
  146. Args:
  147. pred_tensor: (tensor) tensor to decode sized [S, S, 5 x B + C], 5=(x, y, w, h, conf)
  148. Returns:
  149. boxes: (tensor) [[x1, y1, x2, y2]_obj1, ...]. Normalized from 0.0 to 1.0 w.r.t. image width/height, sized [n_boxes, 4].
  150. labels: (tensor) class labels for each detected boxe, sized [n_boxes,].
  151. confidences: (tensor) objectness confidences for each detected box, sized [n_boxes,].
  152. class_scores: (tensor) scores for most likely class for each detected box, sized [n_boxes,].
  153. """
  154. S, B, C = self.S, self.B, self.C
  155. boxes, labels, confidences, class_scores = [], [], [], []
  156. cell_size = 1.0 / float(S)
  157. #每个网格的置信度
  158. conf = pred_tensor[:, :, 4].unsqueeze(2) # [S, S, 1]
  159. for b in range(1, B):
  160. conf = torch.cat((conf, pred_tensor[:, :, 5*b + 4].unsqueeze(2)), 2) #[S,S,2]
  161. conf_mask = conf > self.conf_thresh # [S, S, B]
  162. # TBM, further optimization may be possible by replacing the following for-loops with tensor operations.
  163. for i in range(S): # for x-dimension.
  164. for j in range(S): # for y-dimension.
  165. class_score, class_label = torch.max(pred_tensor[j, i, 5*B:], 0) #找[j,i]网格的最大分类值
  166. for b in range(B): #遍历两预测bbox
  167. conf = pred_tensor[j, i, 5*b + 4]
  168. prob = conf * class_score
  169. if float(prob) < self.prob_thresh: #低于阈值门限继续
  170. continue
  171. # Compute box corner (x1, y1, x2, y2) from tensor.
  172. box = pred_tensor[j, i, 5*b : 5*b + 4]
  173. x0y0_normalized = torch.FloatTensor([i, j]) * cell_size # 该网格的坐上角归一化坐标
  174. xy_normalized = box[:2] * cell_size + x0y0_normalized # 从对cell归一化的中心点位置还原出来 现在是对图片大小归一化
  175. wh_normalized = box[2:] # 归一化的宽高
  176. box_xyxy = torch.FloatTensor(4) # [4,]随便初始4个
  177. box_xyxy[:2] = xy_normalized - 0.5 * wh_normalized # 归一化左上X-》应该是左下角角位置(x1, y1).
  178. box_xyxy[2:] = xy_normalized + 0.5 * wh_normalized # 归一化右下X-》应该是有右上角角位置(x2, y2).
  179. # Append result to the lists.
  180. boxes.append(box_xyxy)
  181. labels.append(class_label)
  182. confidences.append(conf)
  183. class_scores.append(class_score)
  184. if len(boxes) > 0:
  185. boxes = torch.stack(boxes, 0) # [n_boxes, 4] list转张量
  186. labels = torch.stack(labels, 0) # [n_boxes, ]
  187. confidences = torch.stack(confidences, 0) # [n_boxes, ]
  188. class_scores = torch.stack(class_scores, 0) # [n_boxes, ]
  189. else:
  190. # If no box found, return empty tensors.
  191. boxes = torch.FloatTensor(0, 4)
  192. labels = torch.LongTensor(0)
  193. confidences = torch.FloatTensor(0)
  194. class_scores = torch.FloatTensor(0)
  195. return boxes, labels, confidences, class_scores
  196. def nms(self, boxes, scores):
  197. """ Apply non maximum supression.
  198. Args:
  199. Returns:
  200. """
  201. threshold = self.nms_thresh
  202. x1 = boxes[:, 0] # [n,]
  203. y1 = boxes[:, 1] # [n,]
  204. x2 = boxes[:, 2] # [n,]
  205. y2 = boxes[:, 3] # [n,]
  206. areas = (x2 - x1) * (y2 - y1) # [n,]
  207. _, ids_sorted = scores.sort(0, descending=True) # [n,]
  208. ids = []
  209. while ids_sorted.numel() > 0:
  210. # Assume `ids_sorted` size is [m,] in the beginning of this iter.
  211. #最后剩下一个的时候detach 脱离出tensor
  212. i = ids_sorted.item() if (ids_sorted.numel() == 1) else ids_sorted[0]
  213. ids.append(i)
  214. if ids_sorted.numel() == 1:
  215. break # If only one box is left (i.e., no box to supress), break.
  216. inter_x1 = x1[ids_sorted[1:]].clamp(min=x1[i]) # [m-1, ]
  217. inter_y1 = y1[ids_sorted[1:]].clamp(min=y1[i]) # [m-1, ]
  218. inter_x2 = x2[ids_sorted[1:]].clamp(max=x2[i]) # [m-1, ] 画图就懂了
  219. inter_y2 = y2[ids_sorted[1:]].clamp(max=y2[i]) # [m-1, ]
  220. inter_w = (inter_x2 - inter_x1).clamp(min=0) # [m-1, ]
  221. inter_h = (inter_y2 - inter_y1).clamp(min=0) # [m-1, ]
  222. inters = inter_w * inter_h # intersections b/w/ box `i` and other boxes, sized [m-1, ].
  223. unions = areas[i] + areas[ids_sorted[1:]] - inters # unions b/w/ box `i` and other boxes, sized [m-1, ].
  224. ious = inters / unions # [m-1, ]
  225. # Remove boxes whose IoU is higher than the threshold.#(ious <= threshold).nonzero() 形状(2,1)
  226. ids_keep = (ious <= threshold).nonzero().squeeze() # [m-1, ]. Because `nonzero()` adds extra dimension, squeeze it.
  227. if ids_keep.numel() == 0:
  228. break # If no box left, break.
  229. ids_sorted = ids_sorted[ids_keep+1] # `+1` is needed because `ids_sorted[0] = i`.
  230. return torch.LongTensor(ids)
  231. if __name__ == '__main__':
  232. # Paths to input/output images.
  233. image_path = '000369.jpg'
  234. out_path = 'result.png'
  235. # Path to the yolo weight.
  236. model_path = 'weights/model_best.pth'
  237. # GPU device on which yolo is loaded.
  238. gpu_id = 0
  239. # Load model.
  240. yolo = YOLODetector(model_path, gpu_id=gpu_id, conf_thresh=0.15, prob_thresh=0.45, nms_thresh=0.35)
  241. # Load image.
  242. image = cv2.imread(image_path)#某些老的图像处理软件使用的是 BGR 格式,因此 OpenCV 采用 BGR 格式可以与这些软件兼容。
  243. # Detect objects.
  244. boxes, class_names, probs = yolo.detect(image)
  245. # Visualize.
  246. image_boxes = visualize_boxes(image, boxes, class_names, probs)
  247. # Output detection result as an image.
  248. cv2.imwrite(out_path, image_boxes)

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号