当前位置:   article > 正文

最新版的YOLOv5输出GradCAM热力图_yolov5 grad-cam

yolov5 grad-cam

一、YOLOV5输出热力图?

由于GitHub上的YoLo项目一直在更新,导致旧版本添加GradCAM热力图的方式不再使用,为了保证成功输出热力图,在此文进行详细介绍。

二、使用步骤

---修改部分
   ---model/yolo.py(Detect类中的forward函数)

---添加部分
   ---model/gradcam.py
   ---model/yolov5_object_detector.py
   ---main_gradcam.py
 

修改yolo代码如下(示例):

  1. 修改model/yolo.py文件中的Detect类中的forward函数
  2. 添加如下四条语句:
  3. logits_ = []
  4. logits = x[i][..., 5:]
  5. logits_.append(logits.view(bs, -1, self.no - 5))
  6. out = (torch.cat(z, 1), torch.cat(logits_, 1), x)
  1. def forward(self, x):
  2. z = [] # inference output
  3. logits_ = [] # 修改---1
  4. for i in range(self.nl):
  5. x[i] = self.m[i](x[i]) # conv
  6. bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
  7. x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
  8. if not self.training: # inference
  9. if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
  10. self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
  11. logits = x[i][..., 5:] # 修改---2
  12. if isinstance(self, Segment): # (boxes + masks)
  13. xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
  14. xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
  15. wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
  16. y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
  17. else: # Detect (boxes only)
  18. xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
  19. xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
  20. wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
  21. y = torch.cat((xy, wh, conf), 4)
  22. z.append(y.view(bs, self.na * nx * ny, self.no))
  23. logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3
  24. # return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(logits_, 1), x)
  25. return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) #修改4

2.

在models目录下添加gradcam.py文件

代码如下:

  1. import time
  2. import torch
  3. import torch.nn.functional as F
  4. def find_yolo_layer(model, layer_name):
  5. """Find yolov5 layer to calculate GradCAM and GradCAM++
  6. Args:
  7. model: yolov5 model.
  8. layer_name (str): the name of layer with its hierarchical information.
  9. Return:
  10. target_layer: found layer
  11. """
  12. hierarchy = layer_name.split('_')
  13. target_layer = model.model._modules[hierarchy[0]]
  14. for h in hierarchy[1:]:
  15. target_layer = target_layer._modules[h]
  16. return target_layer
  17. class YOLOV5GradCAM:
  18. # 初始化,得到target_layer层
  19. def __init__(self, model, layer_name, img_size=(640, 640)):
  20. self.model = model
  21. self.gradients = dict()
  22. self.activations = dict()
  23. def backward_hook(module, grad_input, grad_output):
  24. self.gradients['value'] = grad_output[0]
  25. return None
  26. def forward_hook(module, input, output):
  27. self.activations['value'] = output
  28. return None
  29. target_layer = find_yolo_layer(self.model, layer_name)
  30. # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录
  31. target_layer.register_forward_hook(forward_hook)
  32. target_layer.register_full_backward_hook(backward_hook)
  33. device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu'
  34. self.model(torch.zeros(1, 3, *img_size, device=device))
  35. def forward(self, input_img, class_idx=True):
  36. """
  37. Args:
  38. input_img: input image with shape of (1, 3, H, W)
  39. Return:
  40. mask: saliency map of the same spatial dimension with input
  41. logit: model output
  42. preds: The object predictions
  43. """
  44. saliency_maps = []
  45. b, c, h, w = input_img.size()
  46. preds, logits = self.model(input_img)
  47. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  48. if class_idx:
  49. score = logit[cls]
  50. else:
  51. score = logit.max()
  52. self.model.zero_grad()
  53. tic = time.time()
  54. # 获取梯度
  55. score.backward(retain_graph=True)
  56. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  57. gradients = self.gradients['value']
  58. activations = self.activations['value']
  59. b, k, u, v = gradients.size()
  60. alpha = gradients.view(b, k, -1).mean(2)
  61. weights = alpha.view(b, k, 1, 1)
  62. for i in range(4):
  63. weights=torch.cat((weights,weights),1)
  64. i +=1
  65. saliency_map = (weights * activations).sum(1, keepdim=True)
  66. saliency_map = F.relu(saliency_map)
  67. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  68. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  69. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  70. saliency_maps.append(saliency_map)
  71. return saliency_maps, logits, preds
  72. def __call__(self, input_img):
  73. return self.forward(input_img)
  74. class YOLOV5GradCAMPP(YOLOV5GradCAM):
  75. def __init__(self, model, layer_name, img_size=(640, 640)):
  76. super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size)
  77. def forward(self, input_img, class_idx=True):
  78. saliency_maps = []
  79. b, c, h, w = input_img.size()
  80. tic = time.time()
  81. preds, logits = self.model(input_img)
  82. print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds')
  83. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  84. if class_idx:
  85. score = logit[cls]
  86. else:
  87. score = logit.max()
  88. self.model.zero_grad()
  89. tic = time.time()
  90. # 获取梯度
  91. score.backward(retain_graph=True)
  92. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  93. gradients = self.gradients['value'] # dS/dA
  94. activations = self.activations['value'] # A
  95. b, k, u, v = gradients.size()
  96. alpha_num = gradients.pow(2)
  97. alpha_denom = gradients.pow(2).mul(2) + \
  98. activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1)
  99. # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y
  100. alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom))
  101. alpha = alpha_num.div(alpha_denom + 1e-7)
  102. positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
  103. weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1)
  104. saliency_map = (weights * activations).sum(1, keepdim=True)
  105. saliency_map = F.relu(saliency_map)
  106. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  107. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  108. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  109. saliency_maps.append(saliency_map)
  110. return saliency_maps, logits, preds

3:添加yolo_v5_object_detector.py文件

具体代码如下:

  1. import numpy as np
  2. import torch
  3. from models.experimental import attempt_load
  4. from utils.general import xywh2xyxy
  5. from utils.dataloaders import letterbox
  6. import cv2
  7. import time
  8. import torchvision
  9. import torch.nn as nn
  10. from utils.metrics import box_iou
  11. class YOLOV5TorchObjectDetector(nn.Module):
  12. def __init__(self,
  13. model_weight,
  14. device,
  15. img_size,
  16. names=None,
  17. mode='eval',
  18. confidence=0.45,
  19. iou_thresh=0.45,
  20. agnostic_nms=False):
  21. super(YOLOV5TorchObjectDetector, self).__init__()
  22. self.device = device
  23. self.model = None
  24. self.img_size = img_size
  25. self.mode = mode
  26. self.confidence = confidence
  27. self.iou_thresh = iou_thresh
  28. self.agnostic = agnostic_nms
  29. self.model = attempt_load(model_weight, device=device, inplace=False, fuse=False)
  30. self.model.requires_grad_(True)
  31. self.model.to(device)
  32. if self.mode == 'train':
  33. self.model.train()
  34. else:
  35. self.model.eval()
  36. # fetch the names
  37. if names is None:
  38. self.names = ['your dataset classname']
  39. else:
  40. self.names = names
  41. # preventing cold start
  42. img = torch.zeros((1, 3, *self.img_size), device=device)
  43. self.model(img)
  44. @staticmethod
  45. def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False,
  46. multi_label=False, labels=(), max_det=300):
  47. """Runs Non-Maximum Suppression (NMS) on inference and logits results
  48. Returns:
  49. list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes)
  50. """
  51. nc = prediction.shape[2] - 5 # number of classes
  52. xc = prediction[..., 4] > conf_thres # candidates
  53. # Checks
  54. assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
  55. assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
  56. # Settings
  57. min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
  58. max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
  59. time_limit = 10.0 # seconds to quit after
  60. redundant = True # require redundant detections
  61. multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
  62. merge = False # use merge-NMS
  63. t = time.time()
  64. output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
  65. logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0]
  66. # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0]
  67. for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference
  68. # Apply constraints
  69. # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
  70. x = x[xc[xi]] # confidence
  71. log_ = log_[xc[xi]]
  72. # Cat apriori labels if autolabelling
  73. if labels and len(labels[xi]):
  74. l = labels[xi]
  75. v = torch.zeros((len(l), nc + 5), device=x.device)
  76. v[:, :4] = l[:, 1:5] # box
  77. v[:, 4] = 1.0 # conf
  78. v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
  79. x = torch.cat((x, v), 0)
  80. # If none remain process next image
  81. if not x.shape[0]:
  82. continue
  83. # Compute conf
  84. x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
  85. # Box (center x, center y, width, height) to (x1, y1, x2, y2)
  86. box = xywh2xyxy(x[:, :4])
  87. # Detections matrix nx6 (xyxy, conf, cls)
  88. if multi_label:
  89. i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
  90. x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
  91. else: # best class only
  92. conf, j = x[:, 5:].max(1, keepdim=True)
  93. x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
  94. log_ = log_[conf.view(-1) > conf_thres]
  95. # Filter by class
  96. if classes is not None:
  97. x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
  98. # Check shape
  99. n = x.shape[0] # number of boxes
  100. if not n: # no boxes
  101. continue
  102. elif n > max_nms: # excess boxes
  103. x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
  104. # Batched NMS
  105. c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
  106. boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
  107. i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
  108. if i.shape[0] > max_det: # limit detections
  109. i = i[:max_det]
  110. if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
  111. # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
  112. iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
  113. weights = iou * scores[None] # box weights
  114. x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
  115. if redundant:
  116. i = i[iou.sum(1) > 1] # require redundancy
  117. output[xi] = x[i]
  118. logits_output[xi] = log_[i]
  119. assert log_[i].shape[0] == x[i].shape[0]
  120. if (time.time() - t) > time_limit:
  121. print(f'WARNING: NMS time limit {time_limit}s exceeded')
  122. break # time limit exceeded
  123. return output, logits_output
  124. @staticmethod
  125. def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  126. return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup)
  127. def forward(self, img):
  128. prediction, logits, _ = self.model(img, augment=False)
  129. prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh,
  130. classes=None,
  131. agnostic=self.agnostic)
  132. self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in
  133. range(4)]
  134. for i, det in enumerate(prediction): # detections per image
  135. if len(det):
  136. for *xyxy, conf, cls in det:
  137. # 返回整数
  138. bbox = [int(b) for b in xyxy]
  139. self.boxes[i].append(bbox)
  140. self.confidences[i].append(round(conf.item(), 2))
  141. cls = int(cls.item())
  142. self.classes[i].append(cls)
  143. if self.names is not None:
  144. self.class_names[i].append(self.names[cls])
  145. else:
  146. self.class_names[i].append(cls)
  147. return [self.boxes, self.classes, self.class_names, self.confidences], logits
  148. def preprocessing(self, img):
  149. if len(img.shape) != 4:
  150. img = np.expand_dims(img, axis=0)
  151. im0 = img.astype(np.uint8)
  152. img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0])
  153. img = img.transpose((0, 3, 1, 2))
  154. img = np.ascontiguousarray(img)
  155. img = torch.from_numpy(img).to(self.device)
  156. img = img / 255.0
  157. return img

4:添加主程序
具体代码如下:

  1. import os
  2. import random
  3. import time
  4. import argparse
  5. import numpy as np
  6. from models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPP
  7. from models.yolo_v5_object_detector import YOLOV5TorchObjectDetector
  8. import cv2
  9. # 数据集类别名
  10. names = ["0","1"] # class names
  11. # yolov5s网络中的三个detect层
  12. # target_layers = ['model_18_cv3_act', 'model_21_cv3_act', 'model_24_cv3_act']
  13. target_layers = ['model_18_cv3_act', 'model_21_cv3_act', 'model_24_cv3_act']
  14. # Arguments
  15. parser = argparse.ArgumentParser()
  16. parser.add_argument('--model-path', type=str, default="D:/yolov5-master/yolov5-master/runs/train/exp31/weights/best.pt", help='Path to the model')
  17. parser.add_argument('--img-path', type=str, default='D:/yolov5-master/yolov5-master/mydata2/images/test/Pic28.jpg', help='input image path')
  18. parser.add_argument('--output-dir', type=str, default='./gracam/31outputs/', help='output dir')
  19. parser.add_argument('--img-size', type=int, default=640, help="input image size")
  20. parser.add_argument('--target-layer', type=str, default='model_17_cv3_act',
  21. help='The layer hierarchical address to which gradcam will applied,'
  22. ' the names should be separated by underline')
  23. parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')
  24. parser.add_argument('--device', type=str, default='cpu', help='cuda or cpu')
  25. parser.add_argument('--no_text_box', action='store_true',
  26. help='do not show label and box on the heatmap')
  27. args = parser.parse_args()
  28. def get_res_img(bbox, mask, res_img):
  29. mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype(
  30. np.uint8)
  31. heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET)
  32. # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32)
  33. n_heatmat = (heatmap / 255).astype(np.float32)
  34. res_img = res_img / 255
  35. res_img = cv2.add(res_img, n_heatmat)
  36. res_img = (res_img / res_img.max())
  37. return res_img, n_heatmat
  38. def plot_one_box(x, img, color=None, label=None, line_thickness=3):
  39. # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again!
  40. cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8))
  41. img = cv2.imread('temp.jpg')
  42. # Plots one bounding box on image img
  43. tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
  44. color = color or [random.randint(0, 255) for _ in range(3)]
  45. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
  46. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
  47. if label:
  48. tf = max(tl - 1, 1) # font thickness
  49. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
  50. outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up
  51. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3
  52. outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right
  53. c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1]
  54. c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1]
  55. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
  56. cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf,
  57. lineType=cv2.LINE_AA)
  58. return img
  59. # 检测单个图片
  60. def main(img_path):
  61. colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  62. device = args.device
  63. input_size = (args.img_size, args.img_size)
  64. # 读入图片
  65. img = cv2.imread(img_path) # 读取图像格式:BGR
  66. print('[INFO] Loading the model')
  67. # 实例化YOLOv5模型,得到检测结果
  68. model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names)
  69. # img[..., ::-1]: BGR --> RGB
  70. # (480, 640, 3) --> (1, 3, 480, 640)
  71. torch_img = model.preprocessing(img[..., ::-1])
  72. tic = time.time()
  73. # 遍历三层检测层
  74. for target_layer in target_layers:
  75. # 获取grad-cam方法
  76. if args.method == 'gradcam':
  77. saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size)
  78. elif args.method == 'gradcampp':
  79. saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size)
  80. masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img) # 得到预测结果
  81. result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy()
  82. result = result[..., ::-1] # convert to bgr
  83. # 保存设置
  84. imgae_name = os.path.basename(img_path) # 获取图片名
  85. save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}'
  86. if not os.path.exists(save_path):
  87. os.makedirs(save_path)
  88. print(f'[INFO] Saving the final image at {save_path}')
  89. # 遍历每张图片中的每个目标
  90. for i, mask in enumerate(masks):
  91. # 遍历图片中的每个目标
  92. res_img = result.copy()
  93. # 获取目标的位置和类别信息
  94. bbox, cls_name = boxes[0][i], class_names[0][i]
  95. label = f'{cls_name} {conf[0][i]}' # 类别+置信分数
  96. # 获取目标的热力图
  97. res_img, heat_map = get_res_img(bbox, mask, res_img)
  98. res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))],
  99. line_thickness=3)
  100. # 缩放到原图片大小
  101. res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1]))
  102. output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg'
  103. cv2.imwrite(output_path, res_img)
  104. print(f'{target_layer[6:8]}_{i}.jpg done!!')
  105. print(f'Total time : {round(time.time() - tic, 4)} s')
  106. if __name__ == '__main__':
  107. # 图片路径为文件夹
  108. if os.path.isdir(args.img_path):
  109. img_list = os.listdir(args.img_path)
  110. print(img_list)
  111. for item in img_list:
  112. # 依次获取文件夹中的图片名,组合成图片的路径
  113. main(os.path.join(args.img_path, item))
  114. # 单个图片
  115. else:
  116. main(args.img_path)

 注:1:names要与自己的数据集匹配。

注:2:target_layers要与自己的yaml匹配,up主的分别是18 21 24。对应下图的yaml文件

注意:如果不能成功运行报下列错误:

 saliency_map = (weights * activations).sum(1, keepdim=True) RuntimeError: The size of tensor a (32) must match the size of tensor b (512) at non-singleton dimension 1

需要对gradcam文件进行如下修改:大概在73行


总结


例如:以上就是今天要讲的内容,本文介绍了最新的yolov5项目如何输出热力图,并详细介绍了整个处理流程,需要对大家有用。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/359888
推荐阅读
相关标签
  

闽ICP备14008679号