赞
踩
- 主要看下run_on_opencv_image(self, image)。大概是这样几步。逐步记录
- predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor
- top_predictions = self.select_top_predictions(predictions)
- result = image.copy()
- result = self.overlay_boxes(result, top_predictions)
- if self.cfg.MODEL.KEYPOINT_ON:
- result = self.overlay_keypoints(result, top_predictions)
- result = self.overlay_class_names(result, top_predictions)
- return result
1.先预处理 image = self.transforms(original_image)
- ->输入original_image=[480,640,3],int整型数据;
- ->经过变换后image=[3,800,1066],数据torch.float32;
2.转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽,并放到cuda上。
3计算predictions
predictions = self.model(image_list) #self.model = build_detection_model(cfg)
这里就会跳入 generalized_rcnn.py文件进行模型处理等。更具体的可点链接查看。那这部分就很重要了,有restnet+fpn骨架的建立,rpn进行proposals的生成,和结果预测等。分为三部分:
- self.backbone = build_backbone(cfg)
- self.rpn = build_rpn(cfg, self.backbone.out_channels) ##256 * 4=1024
- self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
1)modeling\backbone\backbone.py 提取各个stage的特征图;然后使用feature map进行RPN及ROI pooling操作;根据输入图片预处理后。
features = self.backbone(images.tensors)
可以得到P2~P6层参数如下:
ipdb> p features[0].size()
torch.Size([1, 256, 200, 272])
ipdb> p features[1].size()
torch.Size([1, 256, 100, 136])
ipdb> p features[2].size()
torch.Size([1, 256, 50, 68])
ipdb> p features[3].size()
torch.Size([1, 256, 25, 34])
ipdb> p features[4].size()
torch.Size([1, 256, 13, 17])
2)modeling\rpn\rpn.py->经过rpn网络得到候选框,每一层都有1000 proposals。
proposals, proposal_losses = self.rpn(images, features, targets)
proposals格式为:[BoxList(num_boxes=1000, image_width=1066, image_height=800, mode=xyxy)]
3)modeling\roi_heads\roi_heads.py。 这里是经过fast rcnn网络,
x, result, detector_losses = self.roi_heads(features, proposals, targets)
由两分支组成:检测分支和分割分支组成;
roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels)))
roi_heads = CombinedROIHeads(cfg, roi_heads)
在roi_heads.py文件的forward()中:x, detections, loss_box = self.box(features, proposals, targets)得到检测结果,
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
- import cv2
- import torch
- from torchvision import transforms as T
-
- from maskrcnn_benchmark.modeling.detector import build_detection_model
- from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
- from maskrcnn_benchmark.structures.image_list import to_image_list
- from maskrcnn_benchmark.modeling.roi_heads.mask_head.inference import Masker
- from maskrcnn_benchmark import layers as L
- from maskrcnn_benchmark.utils import cv2_util
- from tensorboardX import SummaryWriter
-
- class COCODemo(object):
- # COCO categories for pretty print
- CATEGORIES = [
- "__background",
- "person",...(共80+1类)
- "toothbrush",
- ]
-
- def __init__(
- self,
- cfg,
- confidence_threshold=0.7,
- show_mask_heatmaps=False,
- masks_per_dim=2,
- min_image_size=224,
- ):
- self.cfg = cfg.clone()
- self.model = build_detection_model(cfg) #跳入generalized_rcnn.py文件
- print(self.model)
- self.model.eval()
- self.device = torch.device(cfg.MODEL.DEVICE)
- self.model.to(self.device)
- self.min_image_size = min_image_size
-
- save_dir = cfg.OUTPUT_DIR
- checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)
- _ = checkpointer.load(cfg.MODEL.WEIGHT)
-
- self.transforms = self.build_transform() #对images的形式转换以及预处理
-
- mask_threshold = -1 if show_mask_heatmaps else 0.5
- self.masker = Masker(threshold=mask_threshold, padding=1)
-
- # used to make colors for each class gpu_tensor = torch.randn(10,20).cuda(0) # 将tensor放到第一个GPU上
- self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
-
- self.cpu_device = torch.device("cpu")
- self.confidence_threshold = confidence_threshold
- self.show_mask_heatmaps = show_mask_heatmaps
- self.masks_per_dim = masks_per_dim
-
-
- def build_transform(self):
- """
- Creates a basic transformation that was used to train the models
- """
- cfg = self.cfg
-
- # we are loading images with OpenCV, so we don't need to convert them
- # to BGR, they are already! So all we need to do is to normalize
- # by 255 if we want to convert to BGR255 format, or flip the channels
- # if we want it to be in RGB in [0-1] range.
- if cfg.INPUT.TO_BGR255:
- to_bgr_transform = T.Lambda(lambda x: x * 255)
- else:
- to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]])
-
- normalize_transform = T.Normalize(
- mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD
- )
-
- transform = T.Compose(
- [
- T.ToPILImage(),
- T.Resize(self.min_image_size),
- T.ToTensor(),
- to_bgr_transform,
- normalize_transform,
- ]
- )
- return transform
-
- def run_on_opencv_image(self, image):
- """
- Arguments:
- image (np.ndarray): an image as returned by OpenCV
- Returns:
- prediction (BoxList): the detected objects. Additional information
- of the detection properties can be found in the fields of
- the BoxList via `prediction.fields()`
- """
- predictions = self.compute_prediction(image) #np.ndarray): an image as returned by OpenCV to tensor
- top_predictions = self.select_top_predictions(predictions)
-
- result = image.copy()
- if self.show_mask_heatmaps:
- return self.create_mask_montage(result, top_predictions)
- result = self.overlay_boxes(result, top_predictions)
- if self.cfg.MODEL.MASK_ON:
- result = self.overlay_mask(result, top_predictions)
- if self.cfg.MODEL.KEYPOINT_ON:
- result = self.overlay_keypoints(result, top_predictions)
- result = self.overlay_class_names(result, top_predictions)
-
- return result
-
- def compute_prediction(self, original_image):
- """
- Arguments:
- original_image (np.ndarray): an image as returned by OpenCV
- Returns:
- prediction (BoxList): the detected objects. Additional information
- of the detection properties can be found in the fields of
- the BoxList via `prediction.fields()`
- """
- # apply pre-processing to image
- '''
- ->输入original_image=[480,640,3],int整型数据;
- ->经过变换后image=[3,800,1066],数据torch.float32;'''
- image = self.transforms(original_image) #todo what is the type of image type(x) x.type() #tensor
-
- # convert to an ImageList, padded so that it is divisible by
- # cfg.DATALOADER.SIZE_DIVISIBILITY 转换为ImageList,填充,使其可被cfg.DATALOADER.SIZE_DIVISIBILITY 除尽
- image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) #0
- image_list = image_list.to(self.device)
- # compute predictions
- with torch.no_grad():#内部操作不跟踪历史
-
- predictions = self.model(image_list)
-
- predictions = [o.to(self.cpu_device) for o in predictions]
-
- # always single image is passed at a time
- prediction = predictions[0]
-
- # reshape prediction (a BoxList) into the original image size
- height, width,channel = original_image.shape[:]
- print(height, width,channel )
- prediction = prediction.resize((width, height))
- predictions
- if prediction.has_field("mask"):
- # if we have masks, paste the masks in the right position
- # in the image, as defined by the bounding boxes
- masks = prediction.get_field("mask") #todo prediction 的属性
- # always single image is passed at a time
- masks = self.masker([masks], [prediction])[0]
- prediction.add_field("mask", masks)
- return prediction
-
- def select_top_predictions(self, predictions):
- """
- Select only predictions which have a `score` > self.confidence_threshold,
- and returns the predictions in descending order of score
- Arguments:
- predictions (BoxList): the result of the computation by the model.
- It should contain the field `scores`.
- Returns:
- prediction (BoxList): the detected objects. Additional information
- of the detection properties can be found in the fields of
- the BoxList via `prediction.fields()`
- """
- scores = predictions.get_field("scores")
- keep = torch.nonzero(scores > self.confidence_threshold).squeeze(1)
- predictions = predictions[keep]
- scores = predictions.get_field("scores")
- _, idx = scores.sort(0, descending=True)
- return predictions[idx]
-
- def compute_colors_for_labels(self, labels):
- """
- Simple function that adds fixed colors depending on the class
- """
- colors = labels[:, None] * self.palette
- colors = (colors % 255).numpy().astype("uint8")
- return colors
-
- def overlay_boxes(self, image, predictions):
- """
- Adds the predicted boxes on top of the image
- Arguments:
- image (np.ndarray): an image as returned by OpenCV
- predictions (BoxList): the result of the computation by the model.
- It should contain the field `labels`.
- """
- labels = predictions.get_field("labels")
- boxes = predictions.bbox
-
- colors = self.compute_colors_for_labels(labels).tolist()
-
- for box, color in zip(boxes, colors):
- box = box.to(torch.int64)
- top_left, bottom_right = box[:2].tolist(), box[2:].tolist()
- image = cv2.rectangle(
- image, tuple(top_left), tuple(bottom_right), tuple(color), 1
- )
-
- return image
-
- def overlay_mask(self, image, predictions):
- """
- Adds the instances contours for each predicted object.
- Each label has a different color.
- Arguments:
- image (np.ndarray): an image as returned by OpenCV
- predictions (BoxList): the result of the computation by the model.
- It should contain the field `mask` and `labels`.
- """
- masks = predictions.get_field("mask").numpy()
- labels = predictions.get_field("labels")
-
- colors = self.compute_colors_for_labels(labels).tolist()
-
- for mask, color in zip(masks, colors):
- thresh = mask[0, :, :, None]
- contours, hierarchy = cv2_util.findContours(
- thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
- )
- image = cv2.drawContours(image, contours, -1, color, 3)
-
- composite = image
-
- return composite
-
- def overlay_keypoints(self, image, predictions):# todo here
- keypoints = predictions.get_field("keypoints")
- kps = keypoints.keypoints
- scores = keypoints.get_field("logits")
- kps = torch.cat((kps[:, :, 0:2], scores[:, :, None]), dim=2).numpy()
- for region in kps:
- image = vis_keypoints(image, region.transpose((1, 0)))
- return image
-
- def create_mask_montage(self, image, predictions):
- """
- Create a montage showing the probability heatmaps for each one one of the
- detected objects
- Arguments:
- image (np.ndarray): an image as returned by OpenCV
- predictions (BoxList): the result of the computation by the model.
- It should contain the field `mask`.
- """
- masks = predictions.get_field("mask")
- masks_per_dim = self.masks_per_dim
- masks = L.interpolate(
- masks.float(), scale_factor=1 / masks_per_dim
- ).byte()
- height, width = masks.shape[-2:]
- max_masks = masks_per_dim ** 2
- masks = masks[:max_masks]
- # handle case where we have less detections than max_masks
- if len(masks) < max_masks:
- masks_padded = torch.zeros(max_masks, 1, height, width, dtype=torch.uint8)
- masks_padded[: len(masks)] = masks
- masks = masks_padded
- masks = masks.reshape(masks_per_dim, masks_per_dim, height, width)
- result = torch.zeros(
- (masks_per_dim * height, masks_per_dim * width), dtype=torch.uint8
- )
- for y in range(masks_per_dim):
- start_y = y * height
- end_y = (y + 1) * height
- for x in range(masks_per_dim):
- start_x = x * width
- end_x = (x + 1) * width
- result[start_y:end_y, start_x:end_x] = masks[y, x]
- return cv2.applyColorMap(result.numpy(), cv2.COLORMAP_JET)
-
- def overlay_class_names(self, image, predictions):
- """
- Adds detected class names and scores in the positions defined by the
- top-left corner of the predicted bounding box
- Arguments:
- image (np.ndarray): an image as returned by OpenCV
- predictions (BoxList): the result of the computation by the model.
- It should contain the field `scores` and `labels`.
- """
- scores = predictions.get_field("scores").tolist()
- labels = predictions.get_field("labels").tolist()
- labels = [self.CATEGORIES[i] for i in labels]
- boxes = predictions.bbox
-
- template = "{}: {:.2f}"
- for box, score, label in zip(boxes, scores, labels):
- x, y = box[:2]
- s = template.format(label, score)
- cv2.putText(
- image, s, (x, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1
- )
-
- return image
-
- import numpy as np
- import matplotlib.pyplot as plt
- from maskrcnn_benchmark.structures.keypoint import PersonKeypoints # todo here
-
- def vis_keypoints(img, kps, kp_thresh=2, alpha=0.7):# todo here
- """Visualizes keypoints (adapted from vis_one_image).
- kps has shape (4, #keypoints) where 4 rows are (x, y, logit, prob).
- """
- dataset_keypoints = PersonKeypoints.NAMES
- kp_lines = PersonKeypoints.CONNECTIONS
-
- # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
- cmap = plt.get_cmap('rainbow')
- colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)]
- colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
-
- # Perform the drawing on a copy of the image, to allow for blending.
- kp_mask = np.copy(img)
-
- # Draw mid shoulder / mid hip first for better visualization.
- mid_shoulder = (
- kps[:2, dataset_keypoints.index('right_shoulder')] +
- kps[:2, dataset_keypoints.index('left_shoulder')]) / 2.0
- sc_mid_shoulder = np.minimum(
- kps[2, dataset_keypoints.index('right_shoulder')],
- kps[2, dataset_keypoints.index('left_shoulder')])
- mid_hip = (
- kps[:2, dataset_keypoints.index('right_hip')] +
- kps[:2, dataset_keypoints.index('left_hip')]) / 2.0
- sc_mid_hip = np.minimum(
- kps[2, dataset_keypoints.index('right_hip')],
- kps[2, dataset_keypoints.index('left_hip')])
- nose_idx = dataset_keypoints.index('nose')
- if sc_mid_shoulder > kp_thresh and kps[2, nose_idx] > kp_thresh:
- cv2.line(
- kp_mask, tuple(mid_shoulder), tuple(kps[:2, nose_idx]),
- color=colors[len(kp_lines)], thickness=2, lineType=cv2.LINE_AA)
- if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh:
- cv2.line(
- kp_mask, tuple(mid_shoulder), tuple(mid_hip),
- color=colors[len(kp_lines) + 1], thickness=2, lineType=cv2.LINE_AA)
-
- # Draw the keypoints.
- for l in range(len(kp_lines)):
- i1 = kp_lines[l][0]
- i2 = kp_lines[l][1]
- p1 = kps[0, i1], kps[1, i1]
- p2 = kps[0, i2], kps[1, i2]
- if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
- cv2.line(
- kp_mask, p1, p2,
- color=colors[l], thickness=2, lineType=cv2.LINE_AA)
- if kps[2, i1] > kp_thresh:
- cv2.circle(
- kp_mask, p1,
- radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
- if kps[2, i2] > kp_thresh:
- cv2.circle(
- kp_mask, p2,
- radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
-
- # Blend the keypoints.
- return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。