当前位置:   article > 正文

maskrcnn_benchmark理解记录——由demo\predictor.py引入推断过程_top_predictions

top_predictions
  1. 主要看下run_on_opencv_image(self, image)。大概是这样几步。逐步记录
  2. predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor
  3. top_predictions = self.select_top_predictions(predictions)
  4. result = image.copy()
  5. result = self.overlay_boxes(result, top_predictions)
  6. if self.cfg.MODEL.KEYPOINT_ON:
  7. result = self.overlay_keypoints(result, top_predictions)
  8. result = self.overlay_class_names(result, top_predictions)
  9. return result
  • compute_prediction(self, original_image)

1.先预处理     image = self.transforms(original_image)

  1. ->输入original_image=[480,640,3],int整型数据;
  2. ->经过变换后image=[3,800,1066],数据torch.float32;

2.转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽,并放到cuda上。

3计算predictions 

predictions = self.model(image_list)  #self.model = build_detection_model(cfg)

这里就会跳入 generalized_rcnn.py文件进行模型处理等。更具体的可点链接查看。那这部分就很重要了,有restnet+fpn骨架的建立,rpn进行proposals的生成,和结果预测等。分为三部分:

  1. self.backbone = build_backbone(cfg)
  2. self.rpn = build_rpn(cfg, self.backbone.out_channels) ##256 * 4=1024
  3. self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)

1)modeling\backbone\backbone.py   提取各个stage的特征图;然后使用feature map进行RPN及ROI pooling操作;根据输入图片预处理后。

features = self.backbone(images.tensors) 
可以得到P2~P6层参数如下:
ipdb> p features[0].size()
torch.Size([1, 256, 200, 272])
ipdb> p features[1].size()
torch.Size([1, 256, 100, 136])
ipdb> p features[2].size()
torch.Size([1, 256, 50, 68])
ipdb> p features[3].size()
torch.Size([1, 256, 25, 34])
ipdb> p features[4].size()
torch.Size([1, 256, 13, 17])
2)modeling\rpn\rpn.py->经过rpn网络得到候选框,每一层都有1000 proposals。

proposals, proposal_losses = self.rpn(images, features, targets)
proposals格式为:[BoxList(num_boxes=1000, image_width=1066, image_height=800, mode=xyxy)]

3)modeling\roi_heads\roi_heads.py。 这里是经过fast rcnn网络,

x, result, detector_losses = self.roi_heads(features, proposals, targets)
由两分支组成:检测分支和分割分支组成;

roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels)))
roi_heads = CombinedROIHeads(cfg, roi_heads)
 

在roi_heads.py文件的forward()中:x, detections, loss_box = self.box(features, proposals, targets)得到检测结果,
 

  1. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. import cv2
  3. import torch
  4. from torchvision import transforms as T
  5. from maskrcnn_benchmark.modeling.detector import build_detection_model
  6. from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
  7. from maskrcnn_benchmark.structures.image_list import to_image_list
  8. from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
  9. from maskrcnn_benchmark import layers as L
  10. from maskrcnn_benchmark.utils import cv2_util
  11. from tensorboardX import SummaryWriter
  12. class COCODemo(object):
  13. # COCO categories for pretty print
  14. CATEGORIES = [
  15. "__background",
  16. "person",...(共80+1类)
  17. "toothbrush",
  18. ]
  19. def __init__(
  20. self,
  21. cfg,
  22. confidence_threshold=0.7,
  23. show_mask_heatmaps=False,
  24. masks_per_dim=2,
  25. min_image_size=224,
  26. ):
  27. self.cfg = cfg.clone()
  28. self.model = build_detection_model(cfg) #跳入generalized_rcnn.py文件
  29. print(self.model)
  30. self.model.eval()
  31. self.device = torch.device(cfg.MODEL.DEVICE)
  32. self.model.to(self.device)
  33. self.min_image_size = min_image_size
  34. save_dir = cfg.OUTPUT_DIR
  35. checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)
  36. _ = checkpointer.load(cfg.MODEL.WEIGHT)
  37. self.transforms = self.build_transform() #对images的形式转换以及预处理
  38. mask_threshold = -1 if show_mask_heatmaps else 0.5
  39. self.masker = Masker(threshold=mask_threshold, padding=1)
  40. # used to make colors for each class gpu_tensor = torch.randn(10,20).cuda(0) # 将tensor放到第一个GPU上
  41. self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
  42. self.cpu_device = torch.device("cpu")
  43. self.confidence_threshold = confidence_threshold
  44. self.show_mask_heatmaps = show_mask_heatmaps
  45. self.masks_per_dim = masks_per_dim
  46. def build_transform(self):
  47. """
  48. Creates a basic transformation that was used to train the models
  49. """
  50. cfg = self.cfg
  51. # we are loading images with OpenCV, so we don't need to convert them
  52. # to BGR, they are already! So all we need to do is to normalize
  53. # by 255 if we want to convert to BGR255 format, or flip the channels
  54. # if we want it to be in RGB in [0-1] range.
  55. if cfg.INPUT.TO_BGR255:
  56. to_bgr_transform = T.Lambda(lambda x: x * 255)
  57. else:
  58. to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])
  59. normalize_transform = T.Normalize(
  60. mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
  61. )
  62. transform = T.Compose(
  63. [
  64. T.ToPILImage(),
  65. T.Resize(self.min_image_size),
  66. T.ToTensor(),
  67. to_bgr_transform,
  68. normalize_transform,
  69. ]
  70. )
  71. return transform
  72. def run_on_opencv_image(self, image):
  73. """
  74. Arguments:
  75. image (np.ndarray): an image as returned by OpenCV
  76. Returns:
  77. prediction (BoxList): the detected objects. Additional information
  78. of the detection properties can be found in the fields of
  79. the BoxList via `prediction.fields()`
  80. """
  81. predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor
  82. top_predictions = self.select_top_predictions(predictions)
  83. result = image.copy()
  84. if self.show_mask_heatmaps:
  85. return self.create_mask_montage(result, top_predictions)
  86. result = self.overlay_boxes(result, top_predictions)
  87. if self.cfg.MODEL.MASK_ON:
  88. result = self.overlay_mask(result, top_predictions)
  89. if self.cfg.MODEL.KEYPOINT_ON:
  90. result = self.overlay_keypoints(result, top_predictions)
  91. result = self.overlay_class_names(result, top_predictions)
  92. return result
  93. def compute_prediction(self, original_image):
  94. """
  95. Arguments:
  96. original_image (np.ndarray): an image as returned by OpenCV
  97. Returns:
  98. prediction (BoxList): the detected objects. Additional information
  99. of the detection properties can be found in the fields of
  100. the BoxList via `prediction.fields()`
  101. """
  102. # apply pre-processing to image
  103. '''
  104. ->输入original_image=[480,640,3],int整型数据;
  105. ->经过变换后image=[3,800,1066],数据torch.float32;'''
  106. image = self.transforms(original_image) #todo what is the type of image type(x) x.type() #tensor
  107. # convert to an ImageList, padded so that it is divisible by
  108. # cfg.DATALOADER.SIZE_DIVISIBILITY 转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽
  109. image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) #0
  110. image_list = image_list.to(self.device)
  111. # compute predictions
  112. with torch.no_grad():#内部操作不跟踪历史
  113. predictions = self.model(image_list)
  114. predictions = [o.to(self.cpu_device) for o in predictions]
  115. # always single image is passed at a time
  116. prediction = predictions[0]
  117. # reshape prediction (a BoxList) into the original image size
  118. height, width,channel = original_image.shape[:]
  119. print(height, width,channel )
  120. prediction = prediction.resize((width, height))
  121. predictions
  122. if prediction.has_field("mask"):
  123. # if we have masks, paste the masks in the right position
  124. # in the image, as defined by the bounding boxes
  125. masks = prediction.get_field("mask") #todo prediction 的属性
  126. # always single image is passed at a time
  127. masks = self.masker([masks], [prediction])[0]
  128. prediction.add_field("mask", masks)
  129. return prediction
  130. def select_top_predictions(self, predictions):
  131. """
  132. Select only predictions which have a `score` > self.confidence_threshold,
  133. and returns the predictions in descending order of score
  134. Arguments:
  135. predictions (BoxList): the result of the computation by the model.
  136. It should contain the field `scores`.
  137. Returns:
  138. prediction (BoxList): the detected objects. Additional information
  139. of the detection properties can be found in the fields of
  140. the BoxList via `prediction.fields()`
  141. """
  142. scores = predictions.get_field("scores")
  143. keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1)
  144. predictions = predictions[keep]
  145. scores = predictions.get_field("scores")
  146. _, idx = scores.sort(0, descending=True)
  147. return predictions[idx]
  148. def compute_colors_for_labels(self, labels):
  149. """
  150. Simple function that adds fixed colors depending on the class
  151. """
  152. colors = labels[:, None] * self.palette
  153. colors = (colors % 255).numpy().astype("uint8")
  154. return colors
  155. def overlay_boxes(self, image, predictions):
  156. """
  157. Adds the predicted boxes on top of the image
  158. Arguments:
  159. image (np.ndarray): an image as returned by OpenCV
  160. predictions (BoxList): the result of the computation by the model.
  161. It should contain the field `labels`.
  162. """
  163. labels = predictions.get_field("labels")
  164. boxes = predictions.bbox
  165. colors = self.compute_colors_for_labels(labels).tolist()
  166. for box, color in zip(boxes, colors):
  167. box = box.to(torch.int64)
  168. top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
  169. image = cv2.rectangle(
  170. image, tuple(top_left), tuple(bottom_right), tuple(color), 1
  171. )
  172. return image
  173. def overlay_mask(self, image, predictions):
  174. """
  175. Adds the instances contours for each predicted object.
  176. Each label has a different color.
  177. Arguments:
  178. image (np.ndarray): an image as returned by OpenCV
  179. predictions (BoxList): the result of the computation by the model.
  180. It should contain the field `mask` and `labels`.
  181. """
  182. masks = predictions.get_field("mask").numpy()
  183. labels = predictions.get_field("labels")
  184. colors = self.compute_colors_for_labels(labels).tolist()
  185. for mask, color in zip(masks, colors):
  186. thresh = mask[0, :, :, None]
  187. contours, hierarchy = cv2_util.findContours(
  188. thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
  189. )
  190. image = cv2.drawContours(image, contours, -1, color, 3)
  191. composite = image
  192. return composite
  193. def overlay_keypoints(self, image, predictions):# todo here
  194. keypoints = predictions.get_field("keypoints")
  195. kps = keypoints.keypoints
  196. scores = keypoints.get_field("logits")
  197. kps = torch.cat((kps[:, :, 0:2], scores[:, :, None]), dim=2).numpy()
  198. for region in kps:
  199. image = vis_keypoints(image, region.transpose((1, 0)))
  200. return image
  201. def create_mask_montage(self, image, predictions):
  202. """
  203. Create a montage showing the probability heatmaps for each one one of the
  204. detected objects
  205. Arguments:
  206. image (np.ndarray): an image as returned by OpenCV
  207. predictions (BoxList): the result of the computation by the model.
  208. It should contain the field `mask`.
  209. """
  210. masks = predictions.get_field("mask")
  211. masks_per_dim = self.masks_per_dim
  212. masks = L.interpolate(
  213. masks.float(), scale_factor=1 / masks_per_dim
  214. ).byte()
  215. height, width = masks.shape[-2:]
  216. max_masks = masks_per_dim ** 2
  217. masks = masks[:max_masks]
  218. # handle case where we have less detections than max_masks
  219. if len(masks) < max_masks:
  220. masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8)
  221. masks_padded[: len(masks)] = masks
  222. masks = masks_padded
  223. masks = masks.reshape(masks_per_dim, masks_per_dim, height, width)
  224. result = torch.zeros(
  225. (masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8
  226. )
  227. for y in range(masks_per_dim):
  228. start_y = y * height
  229. end_y = (y + 1) * height
  230. for x in range(masks_per_dim):
  231. start_x = x * width
  232. end_x = (x + 1) * width
  233. result[start_y:end_y, start_x:end_x] = masks[y, x]
  234. return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET)
  235. def overlay_class_names(self, image, predictions):
  236. """
  237. Adds detected class names and scores in the positions defined by the
  238. top-left corner of the predicted bounding box
  239. Arguments:
  240. image (np.ndarray): an image as returned by OpenCV
  241. predictions (BoxList): the result of the computation by the model.
  242. It should contain the field `scores` and `labels`.
  243. """
  244. scores = predictions.get_field("scores").tolist()
  245. labels = predictions.get_field("labels").tolist()
  246. labels = [self.CATEGORIES[i] for i in labels]
  247. boxes = predictions.bbox
  248. template = "{}: {:.2f}"
  249. for box, score, label in zip(boxes, scores, labels):
  250. x, y = box[:2]
  251. s = template.format(label, score)
  252. cv2.putText(
  253. image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
  254. )
  255. return image
  256. import numpy as np
  257. import matplotlib.pyplot as plt
  258. from maskrcnn_benchmark.structures.keypoint import PersonKeypoints # todo here
  259. def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):# todo here
  260. """Visualizes keypoints (adapted from vis_one_image).
  261. kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
  262. """
  263. dataset_keypoints = PersonKeypoints.NAMES
  264. kp_lines = PersonKeypoints.CONNECTIONS
  265. # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
  266. cmap = plt.get_cmap('rainbow')
  267. colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
  268. colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
  269. # Perform the drawing on a copy of the image, to allow for blending.
  270. kp_mask = np.copy(img)
  271. # Draw mid shoulder / mid hip first for better visualization.
  272. mid_shoulder = (
  273. kps[:2, dataset_keypoints.index('right_shoulder')] +
  274. kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
  275. sc_mid_shoulder = np.minimum(
  276. kps[2, dataset_keypoints.index('right_shoulder')],
  277. kps[2, dataset_keypoints.index('left_shoulder')])
  278. mid_hip = (
  279. kps[:2, dataset_keypoints.index('right_hip')] +
  280. kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
  281. sc_mid_hip = np.minimum(
  282. kps[2, dataset_keypoints.index('right_hip')],
  283. kps[2, dataset_keypoints.index('left_hip')])
  284. nose_idx = dataset_keypoints.index('nose')
  285. if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
  286. cv2.line(
  287. kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
  288. color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
  289. if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
  290. cv2.line(
  291. kp_mask, tuple(mid_shoulder), tuple(mid_hip),
  292. color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
  293. # Draw the keypoints.
  294. for l in range(len(kp_lines)):
  295. i1 = kp_lines[l][0]
  296. i2 = kp_lines[l][1]
  297. p1 = kps[0, i1], kps[1, i1]
  298. p2 = kps[0, i2], kps[1, i2]
  299. if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
  300. cv2.line(
  301. kp_mask, p1, p2,
  302. color=colors[l], thickness=2, lineType=cv2.LINE_AA)
  303. if kps[2, i1] > kp_thresh:
  304. cv2.circle(
  305. kp_mask, p1,
  306. radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
  307. if kps[2, i2] > kp_thresh:
  308. cv2.circle(
  309. kp_mask, p2,
  310. radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
  311. # Blend the keypoints.
  312. return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小蓝xlanll/article/detail/342794
推荐阅读
  

闽ICP备14008679号