当前位置:   article > 正文

YOLOV5学习笔记(十)——GradCAM热力图可视化

gradcam热力图

在这里插入图片描述

 1、安装 pytorch-grad-cam

pip install grad-cam

2、修改yolo.py

将yolo,py文件的class Detect(nn.Module)的def forward(self, x)替换成如下代码

注意在网络训练时候需要改回原来,因为多返回了一个值影响到loss函数,会报错。

  1. def forward(self, x):
  2. z = [] # inference output
  3. logits_ = [] # 修改---1
  4. for i in range(self.nl):
  5. x[i] = self.m[i](x[i]) # conv
  6. bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
  7. x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
  8. if not self.training: # inference
  9. if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
  10. self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
  11. logits = x[i][..., 5:] # 修改---2
  12. y = x[i].sigmoid()
  13. if self.inplace:
  14. y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
  15. y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
  16. else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
  17. xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy
  18. wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
  19. y = torch.cat((xy, wh, y[..., 4:]), -1)
  20. z.append(y.view(bs, -1, self.no))
  21. logits_.append(logits.view(bs, -1, self.no - 5)) # 修改---3
  22. return x if self.training else (torch.cat(z, 1), torch.cat(logits_, 1), x) #返回预测框坐标、得分和分类

3、添加文件

在yolo.py同目录下添加两个文件

  • gradcam.py
  1. import time
  2. import torch
  3. import torch.nn.functional as F
  4. def find_yolo_layer(model, layer_name):
  5. """Find yolov5 layer to calculate GradCAM and GradCAM++
  6. Args:
  7. model: yolov5 model.
  8. layer_name (str): the name of layer with its hierarchical information.
  9. Return:
  10. target_layer: found layer
  11. """
  12. hierarchy = layer_name.split('_')
  13. target_layer = model.model._modules[hierarchy[0]]
  14. for h in hierarchy[1:]:
  15. target_layer = target_layer._modules[h]
  16. return target_layer
  17. class YOLOV5GradCAM:
  18. # 初始化,得到target_layer层
  19. def __init__(self, model, layer_name, img_size=(640, 640)):
  20. self.model = model
  21. self.gradients = dict()
  22. self.activations = dict()
  23. def backward_hook(module, grad_input, grad_output):
  24. self.gradients['value'] = grad_output[0]
  25. return None
  26. def forward_hook(module, input, output):
  27. self.activations['value'] = output
  28. return None
  29. target_layer = find_yolo_layer(self.model, layer_name)
  30. # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录
  31. target_layer.register_forward_hook(forward_hook)
  32. target_layer.register_full_backward_hook(backward_hook)
  33. device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu'
  34. self.model(torch.zeros(1, 3, *img_size, device=device))
  35. def forward(self, input_img, class_idx=True):
  36. """
  37. Args:
  38. input_img: input image with shape of (1, 3, H, W)
  39. Return:
  40. mask: saliency map of the same spatial dimension with input
  41. logit: model output
  42. preds: The object predictions
  43. """
  44. saliency_maps = []
  45. b, c, h, w = input_img.size()
  46. preds, logits = self.model(input_img)
  47. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  48. if class_idx:
  49. score = logit[cls]
  50. else:
  51. score = logit.max()
  52. self.model.zero_grad()
  53. tic = time.time()
  54. # 获取梯度
  55. score.backward(retain_graph=True)
  56. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  57. gradients = self.gradients['value']
  58. activations = self.activations['value']
  59. b, k, u, v = gradients.size()
  60. alpha = gradients.view(b, k, -1).mean(2)
  61. weights = alpha.view(b, k, 1, 1)
  62. saliency_map = (weights * activations).sum(1, keepdim=True)
  63. saliency_map = F.relu(saliency_map)
  64. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  65. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  66. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  67. saliency_maps.append(saliency_map)
  68. return saliency_maps, logits, preds
  69. def __call__(self, input_img):
  70. return self.forward(input_img)
  71. class YOLOV5GradCAMPP(YOLOV5GradCAM):
  72. def __init__(self, model, layer_name, img_size=(640, 640)):
  73. super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size)
  74. def forward(self, input_img, class_idx=True):
  75. saliency_maps = []
  76. b, c, h, w = input_img.size()
  77. tic = time.time()
  78. preds, logits = self.model(input_img)
  79. print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds')
  80. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  81. if class_idx:
  82. score = logit[cls]
  83. else:
  84. score = logit.max()
  85. self.model.zero_grad()
  86. tic = time.time()
  87. # 获取梯度
  88. score.backward(retain_graph=True)
  89. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  90. gradients = self.gradients['value'] # dS/dA
  91. activations = self.activations['value'] # A
  92. b, k, u, v = gradients.size()
  93. alpha_num = gradients.pow(2)
  94. alpha_denom = gradients.pow(2).mul(2) + \
  95. activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1)
  96. # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y
  97. alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom))
  98. alpha = alpha_num.div(alpha_denom + 1e-7)
  99. positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
  100. weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1)
  101. saliency_map = (weights * activations).sum(1, keepdim=True)
  102. saliency_map = F.relu(saliency_map)
  103. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  104. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  105. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  106. saliency_maps.append(saliency_map)
  107. return saliency_maps, logits, preds
  • yolov5_object_detector.py
  1. import numpy as np
  2. import torch
  3. from models.experimental import attempt_load
  4. from utils.general import xywh2xyxy
  5. from utils.datasets import letterbox
  6. import cv2
  7. import time
  8. import torchvision
  9. import torch.nn as nn
  10. from utils.metrics import box_iou
  11. class YOLOV5TorchObjectDetector(nn.Module):
  12. def __init__(self,
  13. model_weight,
  14. device,
  15. img_size,
  16. names=None,
  17. mode='eval',
  18. confidence=0.45,
  19. iou_thresh=0.45,
  20. agnostic_nms=False):
  21. super(YOLOV5TorchObjectDetector, self).__init__()
  22. self.device = device
  23. self.model = None
  24. self.img_size = img_size
  25. self.mode = mode
  26. self.confidence = confidence
  27. self.iou_thresh = iou_thresh
  28. self.agnostic = agnostic_nms
  29. self.model = attempt_load(model_weight, map_location=device, inplace=False, fuse=False)
  30. self.model.requires_grad_(True)
  31. self.model.to(device)
  32. if self.mode == 'train':
  33. self.model.train()
  34. else:
  35. self.model.eval()
  36. # fetch the names
  37. if names is None:
  38. self.names = ['your dataset classname']
  39. else:
  40. self.names = names
  41. # preventing cold start
  42. img = torch.zeros((1, 3, *self.img_size), device=device)
  43. self.model(img)
  44. @staticmethod
  45. def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False,
  46. multi_label=False, labels=(), max_det=300):
  47. """Runs Non-Maximum Suppression (NMS) on inference and logits results
  48. Returns:
  49. list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes)
  50. """
  51. nc = prediction.shape[2] - 5 # number of classes
  52. xc = prediction[..., 4] > conf_thres # candidates
  53. # Checks
  54. assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
  55. assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
  56. # Settings
  57. min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
  58. max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
  59. time_limit = 10.0 # seconds to quit after
  60. redundant = True # require redundant detections
  61. multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
  62. merge = False # use merge-NMS
  63. t = time.time()
  64. output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
  65. logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0]
  66. # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0]
  67. for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference
  68. # Apply constraints
  69. # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
  70. x = x[xc[xi]] # confidence
  71. log_ = log_[xc[xi]]
  72. # Cat apriori labels if autolabelling
  73. if labels and len(labels[xi]):
  74. l = labels[xi]
  75. v = torch.zeros((len(l), nc + 5), device=x.device)
  76. v[:, :4] = l[:, 1:5] # box
  77. v[:, 4] = 1.0 # conf
  78. v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
  79. x = torch.cat((x, v), 0)
  80. # If none remain process next image
  81. if not x.shape[0]:
  82. continue
  83. # Compute conf
  84. x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
  85. # Box (center x, center y, width, height) to (x1, y1, x2, y2)
  86. box = xywh2xyxy(x[:, :4])
  87. # Detections matrix nx6 (xyxy, conf, cls)
  88. if multi_label:
  89. i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
  90. x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
  91. else: # best class only
  92. conf, j = x[:, 5:].max(1, keepdim=True)
  93. x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
  94. log_ = log_[conf.view(-1) > conf_thres]
  95. # Filter by class
  96. if classes is not None:
  97. x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
  98. # Check shape
  99. n = x.shape[0] # number of boxes
  100. if not n: # no boxes
  101. continue
  102. elif n > max_nms: # excess boxes
  103. x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
  104. # Batched NMS
  105. c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
  106. boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
  107. i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
  108. if i.shape[0] > max_det: # limit detections
  109. i = i[:max_det]
  110. if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
  111. # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
  112. iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
  113. weights = iou * scores[None] # box weights
  114. x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
  115. if redundant:
  116. i = i[iou.sum(1) > 1] # require redundancy
  117. output[xi] = x[i]
  118. logits_output[xi] = log_[i]
  119. assert log_[i].shape[0] == x[i].shape[0]
  120. if (time.time() - t) > time_limit:
  121. print(f'WARNING: NMS time limit {time_limit}s exceeded')
  122. break # time limit exceeded
  123. return output, logits_output
  124. @staticmethod
  125. def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  126. return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup)
  127. def forward(self, img):
  128. prediction, logits, _ = self.model(img, augment=False)
  129. prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh,
  130. classes=None,
  131. agnostic=self.agnostic)
  132. self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in
  133. range(4)]
  134. for i, det in enumerate(prediction): # detections per image
  135. if len(det):
  136. for *xyxy, conf, cls in det:
  137. # 返回整数
  138. bbox = [int(b) for b in xyxy]
  139. self.boxes[i].append(bbox)
  140. self.confidences[i].append(round(conf.item(), 2))
  141. cls = int(cls.item())
  142. self.classes[i].append(cls)
  143. if self.names is not None:
  144. self.class_names[i].append(self.names[cls])
  145. else:
  146. self.class_names[i].append(cls)
  147. return [self.boxes, self.classes, self.class_names, self.confidences], logits
  148. def preprocessing(self, img):
  149. if len(img.shape) != 4:
  150. img = np.expand_dims(img, axis=0)
  151. im0 = img.astype(np.uint8)
  152. img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0])
  153. img = img.transpose((0, 3, 1, 2))
  154. img = np.ascontiguousarray(img)
  155. img = torch.from_numpy(img).to(self.device)
  156. img = img / 255.0
  157. return img
  • 在train.py同目录下添加main_gradcam.py
  1. import os
  2. import random
  3. import time
  4. import argparse
  5. import numpy as np
  6. from models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPP
  7. from models.yolov5_object_detector import YOLOV5TorchObjectDetector
  8. import cv2
  9. # 数据集类别名
  10. names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
  11. 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
  12. 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
  13. 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
  14. 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
  15. 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
  16. 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
  17. 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
  18. 'hair drier', 'toothbrush'] # class names
  19. # yolov5s网络中的三个detect层
  20. target_layers = ['model_17_cv3_act', 'model_20_cv3_act', 'model_23_cv3_act']
  21. # Arguments
  22. parser = argparse.ArgumentParser()
  23. parser.add_argument('--model-path', type=str, default="weights/yolov5s.pt", help='Path to the model')
  24. parser.add_argument('--img-path', type=str, default='data/images', help='input image path')
  25. parser.add_argument('--output-dir', type=str, default='outputs/', help='output dir')
  26. parser.add_argument('--img-size', type=int, default=640, help="input image size")
  27. parser.add_argument('--target-layer', type=str, default='model_17_cv3_act',
  28. help='The layer hierarchical address to which gradcam will applied,'
  29. ' the names should be separated by underline')
  30. parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')
  31. parser.add_argument('--device', type=str, default='cpu', help='cuda or cpu')
  32. parser.add_argument('--no_text_box', action='store_true',
  33. help='do not show label and box on the heatmap')
  34. args = parser.parse_args()
  35. def get_res_img(bbox, mask, res_img):
  36. mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype(
  37. np.uint8)
  38. heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET)
  39. # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32)
  40. n_heatmat = (heatmap / 255).astype(np.float32)
  41. res_img = res_img / 255
  42. res_img = cv2.add(res_img, n_heatmat)
  43. res_img = (res_img / res_img.max())
  44. return res_img, n_heatmat
  45. def plot_one_box(x, img, color=None, label=None, line_thickness=3):
  46. # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again!
  47. cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8))
  48. img = cv2.imread('temp.jpg')
  49. # Plots one bounding box on image img
  50. tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
  51. color = color or [random.randint(0, 255) for _ in range(3)]
  52. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
  53. cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
  54. if label:
  55. tf = max(tl - 1, 1) # font thickness
  56. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
  57. outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up
  58. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3
  59. outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right
  60. c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1]
  61. c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1]
  62. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
  63. cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf,
  64. lineType=cv2.LINE_AA)
  65. return img
  66. # 检测单个图片
  67. def main(img_path):
  68. colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  69. device = args.device
  70. input_size = (args.img_size, args.img_size)
  71. # 读入图片
  72. img = cv2.imread(img_path) # 读取图像格式:BGR
  73. print('[INFO] Loading the model')
  74. # 实例化YOLOv5模型,得到检测结果
  75. model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names)
  76. # img[..., ::-1]: BGR --> RGB
  77. # (480, 640, 3) --> (1, 3, 480, 640)
  78. torch_img = model.preprocessing(img[..., ::-1])
  79. tic = time.time()
  80. # 遍历三层检测层
  81. for target_layer in target_layers:
  82. # 获取grad-cam方法
  83. if args.method == 'gradcam':
  84. saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size)
  85. elif args.method == 'gradcampp':
  86. saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size)
  87. masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img) # 得到预测结果
  88. result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy()
  89. result = result[..., ::-1] # convert to bgr
  90. # 保存设置
  91. imgae_name = os.path.basename(img_path) # 获取图片名
  92. save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}'
  93. if not os.path.exists(save_path):
  94. os.makedirs(save_path)
  95. print(f'[INFO] Saving the final image at {save_path}')
  96. # 遍历每张图片中的每个目标
  97. for i, mask in enumerate(masks):
  98. # 遍历图片中的每个目标
  99. res_img = result.copy()
  100. # 获取目标的位置和类别信息
  101. bbox, cls_name = boxes[0][i], class_names[0][i]
  102. label = f'{cls_name} {conf[0][i]}' # 类别+置信分数
  103. # 获取目标的热力图
  104. res_img, heat_map = get_res_img(bbox, mask, res_img)
  105. res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))],
  106. line_thickness=3)
  107. # 缩放到原图片大小
  108. res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1]))
  109. output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg'
  110. cv2.imwrite(output_path, res_img)
  111. print(f'{target_layer[6:8]}_{i}.jpg done!!')
  112. print(f'Total time : {round(time.time() - tic, 4)} s')
  113. if __name__ == '__main__':
  114. # 图片路径为文件夹
  115. if os.path.isdir(args.img_path):
  116. img_list = os.listdir(args.img_path)
  117. print(img_list)
  118. for item in img_list:
  119. # 依次获取文件夹中的图片名,组合成图片的路径
  120. main(os.path.join(args.img_path, item))
  121. # 单个图片
  122. else:
  123. main(args.img_path)

4、运行

python main_gradcam.py --img-path data/uav2850/test_images1 --device cuda

5、双输入网络

针对双模态网络,我对代码做了如下修改。

  • main_gradcam.py
  1. import os
  2. import random
  3. import time
  4. import argparse
  5. import numpy as np
  6. from models.gradcam import YOLOV5GradCAM, YOLOV5GradCAMPP
  7. from models.yolov5_object_detector import YOLOV5TorchObjectDetector
  8. import cv2
  9. # 数据集类别名
  10. names = ['uav'] # class names
  11. # yolov5s网络中的三个detect层
  12. target_layers = ['model_14']
  13. # Arguments
  14. parser = argparse.ArgumentParser()
  15. parser.add_argument('--model-path', type=str, default="weights/best.pt", help='Path to the model')
  16. parser.add_argument('--img-path', type=str, default='data/images', help='input image path')
  17. parser.add_argument('--img-path2', type=str, default='data/images2', help='input image path')
  18. parser.add_argument('--output-dir', type=str, default='outputs/', help='output dir')
  19. parser.add_argument('--img-size', type=int, default=640, help="input image size")
  20. parser.add_argument('--target-layer', type=str, default='model_32_cv3_act',
  21. help='The layer hierarchical address to which gradcam will applied,'
  22. ' the names should be separated by underline')
  23. parser.add_argument('--method', type=str, default='gradcam', help='gradcam method')
  24. parser.add_argument('--device', type=str, default='cpu', help='cuda or cpu')
  25. parser.add_argument('--no_text_box', action='store_true',
  26. help='do not show label and box on the heatmap')
  27. args = parser.parse_args()
  28. def get_res_img(bbox, mask, res_img):
  29. mask = mask.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy().astype(
  30. np.uint8)
  31. heatmap = cv2.applyColorMap(mask, cv2.COLORMAP_JET)
  32. # n_heatmat = (Box.fill_outer_box(heatmap, bbox) / 255).astype(np.float32)
  33. n_heatmat = (heatmap / 255).astype(np.float32)
  34. res_img = res_img / 255
  35. res_img = cv2.add(res_img, n_heatmat)
  36. res_img = (res_img / res_img.max())
  37. return res_img, n_heatmat
  38. def plot_one_box(x, img, color=None, label=None, line_thickness=3):
  39. # this is a bug in cv2. It does not put box on a converted image from torch unless it's buffered and read again!
  40. cv2.imwrite('temp.jpg', (img * 255).astype(np.uint8))
  41. img = cv2.imread('temp.jpg')
  42. # Plots one bounding box on image img
  43. tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
  44. color = color or [random.randint(0, 255) for _ in range(3)]
  45. c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
  46. #cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) #加目标框
  47. '''
  48. if True: #加标签
  49. tf = max(tl - 1, 1) # font thickness
  50. t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
  51. outside = c1[1] - t_size[1] - 3 >= 0 # label fits outside box up
  52. c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 if outside else c1[1] + t_size[1] + 3
  53. outsize_right = c2[0] - img.shape[:2][1] > 0 # label fits outside box right
  54. c1 = c1[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c1[0], c1[1]
  55. c2 = c2[0] - (c2[0] - img.shape[:2][1]) if outsize_right else c2[0], c2[1]
  56. cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
  57. cv2.putText(img, label, (c1[0], c1[1] - 2 if outside else c2[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf,
  58. lineType=cv2.LINE_AA)
  59. '''
  60. return img
  61. # 检测单个图片
  62. def main(img_path,img_path2):
  63. colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  64. device = args.device
  65. input_size = (args.img_size, args.img_size)
  66. # 读入图片
  67. img = cv2.imread(img_path) # 读取图像格式:BGR
  68. img2 = cv2.imread(img_path2) # 读取图像格式:BGR
  69. print('[INFO] Loading the model')
  70. # 实例化YOLOv5模型,得到检测结果
  71. model = YOLOV5TorchObjectDetector(args.model_path, device, img_size=input_size, names=names)
  72. # img[..., ::-1]: BGR --> RGB
  73. # (480, 640, 3) --> (1, 3, 480, 640)
  74. torch_img = model.preprocessing(img[..., ::-1])
  75. torch_img2 = model.preprocessing(img2[..., ::-1])
  76. tic = time.time()
  77. # 遍历三层检测层
  78. for target_layer in target_layers:
  79. # 获取grad-cam方法
  80. if args.method == 'gradcam':
  81. saliency_method = YOLOV5GradCAM(model=model, layer_name=target_layer, img_size=input_size)
  82. elif args.method == 'gradcampp':
  83. saliency_method = YOLOV5GradCAMPP(model=model, layer_name=target_layer, img_size=input_size)
  84. masks, logits, [boxes, _, class_names, conf] = saliency_method(torch_img,torch_img2) # 得到预测结果
  85. result = torch_img.squeeze(0).mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).detach().cpu().numpy()
  86. result = result[..., ::-1] # convert to bgr
  87. # 保存设置
  88. imgae_name = os.path.basename(img_path) # 获取图片名
  89. save_path = f'{args.output_dir}{imgae_name[:-4]}/{args.method}'
  90. if not os.path.exists(save_path):
  91. os.makedirs(save_path)
  92. print(f'[INFO] Saving the final image at {save_path}')
  93. # 遍历每张图片中的每个目标
  94. for i, mask in enumerate(masks):
  95. # 遍历图片中的每个目标
  96. res_img = result.copy()
  97. # 获取目标的位置和类别信息
  98. bbox, cls_name = boxes[0][i], class_names[0][i]
  99. label = f'{cls_name} {conf[0][i]}' # 类别+置信分数
  100. # 获取目标的热力图
  101. res_img, heat_map = get_res_img(bbox, mask, res_img)
  102. res_img = plot_one_box(bbox, res_img, label=label, color=colors[int(names.index(cls_name))],line_thickness=3)
  103. # 缩放到原图片大小
  104. res_img = cv2.resize(res_img, dsize=(img.shape[:-1][::-1]))
  105. output_path = f'{save_path}/{target_layer[6:8]}_{i}.jpg'
  106. cv2.imwrite(output_path, res_img)
  107. print(f'{target_layer[6:8]}_{i}.jpg done!!')
  108. print(f'Total time : {round(time.time() - tic, 4)} s')
  109. if __name__ == '__main__':
  110. # 图片路径为文件夹
  111. if os.path.isdir(args.img_path):
  112. img_list = os.listdir(args.img_path)
  113. print(img_list)
  114. for item in img_list:
  115. # 依次获取文件夹中的图片名,组合成图片的路径
  116. main(os.path.join(args.img_path, item),os.path.join(args.img_path2, item))
  117. # 单个图片
  118. else:
  119. main(args.img_path,args.img_path2)
  • gradcam.py 
  1. import time
  2. import torch
  3. import torch.nn.functional as F
  4. def find_yolo_layer(model, layer_name):
  5. """Find yolov5 layer to calculate GradCAM and GradCAM++
  6. Args:
  7. model: yolov5 model.
  8. layer_name (str): the name of layer with its hierarchical information.
  9. Return:
  10. target_layer: found layer
  11. """
  12. hierarchy = layer_name.split('_')
  13. target_layer = model.model._modules[hierarchy[0]] #['model', '17', 'cv3', 'act']
  14. for h in hierarchy[1:]:
  15. target_layer = target_layer._modules[h]
  16. print(target_layer)
  17. return target_layer
  18. class YOLOV5GradCAM:
  19. # 初始化,得到target_layer层
  20. def __init__(self, model, layer_name, img_size=(640, 640)):
  21. self.model = model
  22. self.gradients = dict()
  23. self.activations = dict()
  24. def backward_hook(module, grad_input, grad_output):
  25. self.gradients['value'] = grad_output[0]
  26. return None
  27. def forward_hook(module, input, output):
  28. self.activations['value'] = output
  29. return None
  30. target_layer = find_yolo_layer(self.model, layer_name)
  31. # 获取forward过程中每层的输入和输出,用于对比hook是不是正确记录
  32. target_layer.register_forward_hook(forward_hook)
  33. target_layer.register_full_backward_hook(backward_hook)
  34. device = 'cuda' if next(self.model.model.parameters()).is_cuda else 'cpu'
  35. self.model(torch.zeros(1, 3, *img_size, device=device),torch.zeros(1, 3, *img_size, device=device))
  36. def forward(self, input_img,input_img2, class_idx=True):
  37. """
  38. Args:
  39. input_img: input image with shape of (1, 3, H, W)
  40. Return:
  41. mask: saliency map of the same spatial dimension with input
  42. logit: model output
  43. preds: The object predictions
  44. """
  45. saliency_maps = []
  46. b, c, h, w = input_img.size()
  47. preds, logits = self.model(input_img,input_img2)
  48. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  49. if class_idx:
  50. score = logit[cls]
  51. else:
  52. score = logit.max()
  53. self.model.zero_grad()
  54. tic = time.time()
  55. # 获取梯度
  56. score.backward(retain_graph=True)
  57. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  58. gradients = self.gradients['value']
  59. activations = self.activations['value']
  60. b, k, u, v = gradients.size()
  61. alpha = gradients.view(b, k, -1).mean(2)
  62. weights = alpha.view(b, k, 1, 1)
  63. saliency_map = (weights * activations).sum(1, keepdim=True)
  64. saliency_map = F.relu(saliency_map)
  65. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  66. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  67. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  68. saliency_maps.append(saliency_map)
  69. return saliency_maps, logits, preds
  70. def __call__(self, input_img,input_img2):
  71. return self.forward(input_img,input_img2)
  72. class YOLOV5GradCAMPP(YOLOV5GradCAM):
  73. def __init__(self, model, layer_name, img_size=(640, 640)):
  74. super(YOLOV5GradCAMPP, self).__init__(model, layer_name, img_size)
  75. def forward(self, input_img,input_img2, class_idx=True):
  76. saliency_maps = []
  77. b, c, h, w = input_img.size()
  78. tic = time.time()
  79. preds, logits = self.model(input_img,input_img2)
  80. print("[INFO] model-forward took: ", round(time.time() - tic, 4), 'seconds')
  81. for logit, cls, cls_name in zip(logits[0], preds[1][0], preds[2][0]):
  82. if class_idx:
  83. score = logit[cls]
  84. else:
  85. score = logit.max()
  86. self.model.zero_grad()
  87. tic = time.time()
  88. # 获取梯度
  89. score.backward(retain_graph=True)
  90. print(f"[INFO] {cls_name}, model-backward took: ", round(time.time() - tic, 4), 'seconds')
  91. gradients = self.gradients['value'] # dS/dA
  92. activations = self.activations['value'] # A
  93. b, k, u, v = gradients.size()
  94. alpha_num = gradients.pow(2)
  95. alpha_denom = gradients.pow(2).mul(2) + \
  96. activations.mul(gradients.pow(3)).view(b, k, u * v).sum(-1, keepdim=True).view(b, k, 1, 1)
  97. # torch.where(condition, x, y) condition是条件,满足条件就返回x,不满足就返回y
  98. alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom))
  99. alpha = alpha_num.div(alpha_denom + 1e-7)
  100. positive_gradients = F.relu(score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA))
  101. weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view(b, k, 1, 1)
  102. saliency_map = (weights * activations).sum(1, keepdim=True)
  103. saliency_map = F.relu(saliency_map)
  104. saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
  105. saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
  106. saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data
  107. saliency_maps.append(saliency_map)
  108. return saliency_maps, logits, preds
  • yolov5_object_detector.py
  1. import numpy as np
  2. import torch
  3. from models.experimental import attempt_load
  4. from utils.general import xywh2xyxy
  5. from utils.datasets import letterbox
  6. import cv2
  7. import time
  8. import torchvision
  9. import torch.nn as nn
  10. from utils.metrics import box_iou
  11. class YOLOV5TorchObjectDetector(nn.Module):
  12. def __init__(self,
  13. model_weight,
  14. device,
  15. img_size,
  16. names=None,
  17. mode='eval',
  18. confidence=0.45,
  19. iou_thresh=0.45,
  20. agnostic_nms=False):
  21. super(YOLOV5TorchObjectDetector, self).__init__()
  22. self.device = device
  23. self.model = None
  24. self.img_size = img_size
  25. self.mode = mode
  26. self.confidence = confidence
  27. self.iou_thresh = iou_thresh
  28. self.agnostic = agnostic_nms
  29. self.model = attempt_load(model_weight, map_location=device, inplace=False, fuse=False)
  30. self.model.requires_grad_(True)
  31. self.model.to(device)
  32. if self.mode == 'train':
  33. self.model.train()
  34. else:
  35. self.model.eval()
  36. # fetch the names
  37. if names is None:
  38. self.names = ['your dataset classname']
  39. else:
  40. self.names = names
  41. # preventing cold start
  42. img = torch.zeros((1, 3, *self.img_size), device=device)
  43. img2 = torch.zeros((1, 3, *self.img_size), device=device)
  44. self.model(img,img2)
  45. @staticmethod
  46. def non_max_suppression(prediction, logits, conf_thres=0.3, iou_thres=0.45, classes=None, agnostic=False,
  47. multi_label=False, labels=(), max_det=300):
  48. """Runs Non-Maximum Suppression (NMS) on inference and logits results
  49. Returns:
  50. list of detections, on (n,6) tensor per image [xyxy, conf, cls] and pruned input logits (n, number-classes)
  51. """
  52. nc = prediction.shape[2] - 5 # number of classes
  53. xc = prediction[..., 4] > conf_thres # candidates
  54. # Checks
  55. assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
  56. assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
  57. # Settings
  58. min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
  59. max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
  60. time_limit = 10.0 # seconds to quit after
  61. redundant = True # require redundant detections
  62. multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
  63. merge = False # use merge-NMS
  64. t = time.time()
  65. output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
  66. logits_output = [torch.zeros((0, nc), device=logits.device)] * logits.shape[0]
  67. # logits_output = [torch.zeros((0, 80), device=logits.device)] * logits.shape[0]
  68. for xi, (x, log_) in enumerate(zip(prediction, logits)): # image index, image inference
  69. # Apply constraints
  70. # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
  71. x = x[xc[xi]] # confidence
  72. log_ = log_[xc[xi]]
  73. # Cat apriori labels if autolabelling
  74. if labels and len(labels[xi]):
  75. l = labels[xi]
  76. v = torch.zeros((len(l), nc + 5), device=x.device)
  77. v[:, :4] = l[:, 1:5] # box
  78. v[:, 4] = 1.0 # conf
  79. v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
  80. x = torch.cat((x, v), 0)
  81. # If none remain process next image
  82. if not x.shape[0]:
  83. continue
  84. # Compute conf
  85. x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
  86. # Box (center x, center y, width, height) to (x1, y1, x2, y2)
  87. box = xywh2xyxy(x[:, :4])
  88. # Detections matrix nx6 (xyxy, conf, cls)
  89. if multi_label:
  90. i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
  91. x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
  92. else: # best class only
  93. conf, j = x[:, 5:].max(1, keepdim=True)
  94. x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
  95. log_ = log_[conf.view(-1) > conf_thres]
  96. # Filter by class
  97. if classes is not None:
  98. x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
  99. # Check shape
  100. n = x.shape[0] # number of boxes
  101. if not n: # no boxes
  102. continue
  103. elif n > max_nms: # excess boxes
  104. x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
  105. # Batched NMS
  106. c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
  107. boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
  108. i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
  109. if i.shape[0] > max_det: # limit detections
  110. i = i[:max_det]
  111. if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
  112. # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
  113. iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
  114. weights = iou * scores[None] # box weights
  115. x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
  116. if redundant:
  117. i = i[iou.sum(1) > 1] # require redundancy
  118. output[xi] = x[i]
  119. logits_output[xi] = log_[i]
  120. assert log_[i].shape[0] == x[i].shape[0]
  121. if (time.time() - t) > time_limit:
  122. print(f'WARNING: NMS time limit {time_limit}s exceeded')
  123. break # time limit exceeded
  124. return output, logits_output
  125. @staticmethod
  126. def yolo_resize(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
  127. return letterbox(img, new_shape=new_shape, color=color, auto=auto, scaleFill=scaleFill, scaleup=scaleup)
  128. def forward(self, img,img2):
  129. prediction, logits, _ = self.model(img,img2, augment=False)
  130. prediction, logits = self.non_max_suppression(prediction, logits, self.confidence, self.iou_thresh,
  131. classes=None,
  132. agnostic=self.agnostic)
  133. self.boxes, self.class_names, self.classes, self.confidences = [[[] for _ in range(img.shape[0])] for _ in
  134. range(4)]
  135. for i, det in enumerate(prediction): # detections per image
  136. if len(det):
  137. for *xyxy, conf, cls in det:
  138. # 返回整数
  139. bbox = [int(b) for b in xyxy]
  140. self.boxes[i].append(bbox)
  141. self.confidences[i].append(round(conf.item(), 2))
  142. cls = int(cls.item())
  143. self.classes[i].append(cls)
  144. if self.names is not None:
  145. self.class_names[i].append(self.names[cls])
  146. else:
  147. self.class_names[i].append(cls)
  148. return [self.boxes, self.classes, self.class_names, self.confidences], logits
  149. def preprocessing(self, img):
  150. if len(img.shape) != 4:
  151. img = np.expand_dims(img, axis=0)
  152. im0 = img.astype(np.uint8)
  153. img = np.array([self.yolo_resize(im, new_shape=self.img_size)[0] for im in im0])
  154. img = img.transpose((0, 3, 1, 2))
  155. img = np.ascontiguousarray(img)
  156. img = torch.from_numpy(img).to(self.device)
  157. img = img / 255.0
  158. return img
python main_gradcam.py --img-path data/uav2850/test_images1 --img-path2 data/uav2850/test_images2 --device cpu

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/286172
推荐阅读
相关标签
  

闽ICP备14008679号