赞
踩
上一章:深度篇——实例分割(二) 细说 mask rcnn 实例分割代码 训练自己数据
下面的代码携带有注释,思考一下,理解基本不难的。不好理解的地方,直接 debug 去看。更立体。
1.相关工具文件
- #!/usr/bin/env python
- # _*_ coding:utf-8 _*_
- # ============================================
- # @Time : 2020/05/13 22:57
- # @Author : WanDaoYi
- # @FileName : image_utils.py
- # ============================================
-
- import numpy as np
- import skimage.color
- import skimage.io
- import skimage.transform
- from distutils.version import LooseVersion
- from config import cfg
-
-
- class ImageUtils(object):
-
- def __init__(self):
- self.mean_pixel = np.array(cfg.COMMON.MEAN_PIXEL)
- pass
-
- def parse_image_meta_graph(self, meta):
- """
- Parses a tensor that contains image attributes to its components.
- See compose_image_meta() for more details.
- :param meta: [batch, meta length] where meta length depends on NUM_CLASSES
- :return: Returns a dict of the parsed tensors.
- """
-
- image_id = meta[:, 0]
- original_image_shape = meta[:, 1:4]
- image_shape = meta[:, 4:7]
- window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels
- scale = meta[:, 11]
- active_class_ids = meta[:, 12:]
- return {
- "image_id": image_id,
- "original_image_shape": original_image_shape,
- "image_shape": image_shape,
- "window": window,
- "scale": scale,
- "active_class_ids": active_class_ids,
- }
- pass
-
- def compose_image_meta(self, image_id, original_image_shape, image_shape,
- window, scale, active_class_ids):
- """
- Takes attributes of an image and puts them in one 1D array.
- :param image_id: An int ID of the image. Useful for debugging.
- :param original_image_shape: [H, W, C] before resizing or padding.
- :param image_shape: [H, W, C] after resizing and padding
- :param window: (y1, x1, y2, x2) in pixels. The area of the image where the real
- image is (excluding the padding)
- :param scale: The scaling factor applied to the original image (float32)
- :param active_class_ids: List of class_ids available in the dataset from which
- the image came. Useful if training on images from multiple datasets
- where not all classes are present in all datasets.
- :return:
- """
-
- meta = np.array([image_id] + # size=1
- list(original_image_shape) + # size=3
- list(image_shape) + # size=3
- list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates
- [scale] + # size=1
- list(active_class_ids) # size=class_num
- )
- return meta
- pass
-
- def load_image(self, image_path):
- """
- Load the specified image and return a [H,W,3] Numpy array.
- :param image_path: image path
- :return:
- """
- # Load image
- image = skimage.io.imread(image_path)
- # If grayscale. Convert to RGB for consistency.
- if image.ndim != 3:
- image = skimage.color.gray2rgb(image)
- # If has an alpha channel, remove it for consistency
- if image.shape[-1] == 4:
- image = image[..., :3]
- return image
- pass
-
- def mold_image(self, images, mean_pixel):
- """
- Expects an RGB image (or array of images) and subtracts
- the mean pixel and converts it to float. Expects image
- colors in RGB order.
- :param images:
- :param mean_pixel:
- :return:
- """
- return images.astype(np.float32) - np.array(mean_pixel)
- pass
-
- def mode_input(self, images_info_list):
- """
- Takes a list of images and modifies them to the format expected
- as an input to the neural network.
- :param images_info_list: List of image matrices [height,width,depth]. Images can have
- different sizes.
- :return: returns 3 Numpy matrices:
- molded_images_list: [N, h, w, 3]. Images resized and normalized.
- image_metas_list: [N, length of meta data]. Details about each image.
- windows_list: [N, (y1, x1, y2, x2)]. The portion of the image that has the
- original image (padding excluded).
- """
-
- molded_images_list = []
- image_metas_list = []
- windows_list = []
-
- image_mi_dim = cfg.COMMON.IMAGE_MIN_DIM
- image_max_dim = cfg.COMMON.IMAGE_MAX_DIM
- image_min_scale = cfg.COMMON.IMAGE_MIN_SCALE
- image_resize_mode = cfg.COMMON.IMAGE_RESIZE_MODE
-
- for image_info in images_info_list:
- # resize image
- molded_image, window, scale, padding, crop = self.resize_image(image_info,
- min_dim=image_mi_dim,
- min_scale=image_min_scale,
- max_dim=image_max_dim,
- resize_mode=image_resize_mode)
-
- molded_image = self.mold_image(molded_image, self.mean_pixel)
-
- # Build image_meta
- image_meta = self.compose_image_meta(0, image_info.shape, molded_image.shape, window, scale,
- np.zeros([cfg.COMMON.CLASS_NUM], dtype=np.int32))
- # Append
- molded_images_list.append(molded_image)
- image_metas_list.append(image_meta)
- windows_list.append(window)
- pass
-
- # Pack into arrays
- molded_images_list = np.stack(molded_images_list)
- image_metas_list = np.stack(image_metas_list)
- windows_list = np.stack(windows_list)
- return molded_images_list, image_metas_list, windows_list
- pass
-
- def resize(self, image, output_shape, order=1, resize_mode="constant", cval=0, clip=True,
- preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
- """
- A wrapper for Scikit-Image resize().
- Scikit-Image generates warnings on every call to resize() if it doesn't
- receive the right parameters. The right parameters depend on the version
- of skimage. This solves the problem by using different parameters per
- version. And it provides a central place to control resizing defaults.
- :param image:
- :param output_shape:
- :param order:
- :param resize_mode:
- :param cval:
- :param clip:
- :param preserve_range:
- :param anti_aliasing:
- :param anti_aliasing_sigma:
- :return:
- """
- if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
- # New in 0.14: anti_aliasing. Default it to False for backward
- # compatibility with skimage 0.13.
- return skimage.transform.resize(image, output_shape,
- order=order, mode=resize_mode, cval=cval, clip=clip,
- preserve_range=preserve_range, anti_aliasing=anti_aliasing,
- anti_aliasing_sigma=anti_aliasing_sigma)
- else:
- return skimage.transform.resize(image, output_shape,
- order=order, mode=resize_mode, cval=cval, clip=clip,
- preserve_range=preserve_range)
- pass
-
- def resize_image(self, image, min_dim=None, max_dim=None, min_scale=None, resize_mode="square"):
- """
- resize an image keeping the aspect ratio unchanged.
- :param image:
- :param min_dim: if provided, resize the image such that it's smaller dimension == min_dim
- :param max_dim: if provided, ensures that the image longest side doesn't
- exceed this value.
- :param min_scale: if provided, ensure that the image is scaled up by at least
- this percent even if min_dim doesn't require it.
- :param resize_mode: resizing mode.
- none: No resizing. Return the image unchanged.
- square: Resize and pad with zeros to get a square image
- of size [max_dim, max_dim].
- pad64: Pads width and height with zeros to make them multiples of 64.
- If min_dim or min_scale are provided, it scales the image up
- before padding. max_dim is ignored in this mode.
- The multiple of 64 is needed to ensure smooth scaling of feature
- maps up and down the 6 levels of the FPN pyramid (2**6=64).
- crop: Picks random crops from the image. First, scales the image based
- on min_dim and min_scale, then picks a random crop of
- size min_dim x min_dim. Can be used in training only.
- max_dim is not used in this mode.
- :return:
- image: the resized image
- window: (y1, x1, y2, x2). If max_dim is provided, padding might
- be inserted in the returned image. If so, this window is the
- coordinates of the image part of the full image (excluding
- the padding). The x2, y2 pixels are not included.
- scale: The scale factor used to resize the image
- padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
- """
- # Keep track of image dtype and return results in the same dtype
- image_dtype = image.dtype
- # Default window (y1, x1, y2, x2) and default scale == 1.
- h, w = image.shape[:2]
- window = (0, 0, h, w)
- scale = 1
- padding = [(0, 0), (0, 0), (0, 0)]
- crop = None
-
- if resize_mode == "none":
- return image, window, scale, padding, crop
- pass
-
- # Scale?
- if min_dim:
- # Scale up but not down
- scale = max(1, min_dim / min(h, w))
- pass
- if min_scale and scale < min_scale:
- scale = min_scale
- pass
-
- # Does it exceed max dim?
- if max_dim and resize_mode == "square":
- image_max = max(h, w)
- if round(image_max * scale) > max_dim:
- scale = max_dim / image_max
- pass
- pass
-
- # Resize image using bilinear interpolation
- if scale != 1:
- image = self.resize(image, (round(h * scale), round(w * scale)), preserve_range=True)
- pass
-
- # Need padding or cropping?
- if resize_mode == "square":
- # Get new height and width
- h, w = image.shape[:2]
- top_pad = (max_dim - h) // 2
- bottom_pad = max_dim - h - top_pad
- left_pad = (max_dim - w) // 2
- right_pad = max_dim - w - left_pad
- padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
- image = np.pad(image, padding, mode='constant', constant_values=0)
- window = (top_pad, left_pad, h + top_pad, w + left_pad)
- pass
-
- elif resize_mode == "pad64":
- h, w = image.shape[:2]
- # Both sides must be divisible by 64
- assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
- # Height
- if h % 64 > 0:
- max_h = h - (h % 64) + 64
- top_pad = (max_h - h) // 2
- bottom_pad = max_h - h - top_pad
- else:
- top_pad = bottom_pad = 0
- # Width
- if w % 64 > 0:
- max_w = w - (w % 64) + 64
- left_pad = (max_w - w) // 2
- right_pad = max_w - w - left_pad
- else:
- left_pad = right_pad = 0
- padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
- image = np.pad(image, padding, mode='constant', constant_values=0)
- window = (top_pad, left_pad, h + top_pad, w + left_pad)
- pass
-
- elif resize_mode == "crop":
- # Pick a random crop
- h, w = image.shape[:2]
- y = np.random.randint(0, (h - min_dim))
- x = np.random.randint(0, (w - min_dim))
- crop = (y, x, min_dim, min_dim)
- image = image[y:y + min_dim, x:x + min_dim]
- window = (0, 0, min_dim, min_dim)
- pass
-
- else:
- raise Exception("Mode {} not supported".format(resize_mode))
- pass
-
- return image.astype(image_dtype), window, scale, padding, crop
-
- pass

- #!/usr/bin/env python
- # _*_ coding:utf-8 _*_
- # ============================================
- # @Time : 2020/05/13 12:06
- # @Author : WanDaoYi
- # @FileName : misc_utils.py
- # ============================================
-
- import math
- import numpy as np
- import tensorflow as tf
- from utils.bbox_utils import BboxUtil
- from config import cfg
-
-
- class MiscUtils(object):
-
- def __init__(self):
- self.bbox_util = BboxUtil()
- pass
-
- def compute_backbone_shapes(self, image_shape, backbone_strides):
- """
- Computes the width and height of each stage of the backbone network
- :param image_shape: [h, w, c]
- :param backbone_strides: The strides of each layer of the FPN Pyramid.
- These values are based on a resNet101 backbone.
- :return: [N, (height, width)]. Where N is the number of stages
- """
- return np.array(
- [[int(math.ceil(image_shape[0] / stride)),
- int(math.ceil(image_shape[1] / stride))] for stride in backbone_strides])
- pass
-
- def batch_slice(self, inputs, graph_fn, batch_size, names=None):
- """
- Splits inputs into slices and feeds each slice to a copy of the given
- computation graph and then combines the results. It allows you to run a
- graph on a batch of inputs even if the graph is written to support one
- instance only.
- :param inputs: list of tensors. All must have the same first dimension length
- :param graph_fn: A function that returns a TF tensor that's part of a graph.
- :param batch_size: number of slices to divide the data into.
- :param names: If provided, assigns names to the resulting tensors.
- :return:
- """
-
- if not isinstance(inputs, list):
- inputs = [inputs]
-
- outputs = []
- for i in range(batch_size):
- inputs_slice = [x[i] for x in inputs]
- output_slice = graph_fn(*inputs_slice)
- if not isinstance(output_slice, (tuple, list)):
- output_slice = [output_slice]
- outputs.append(output_slice)
- # Change outputs from a list of slices where each is
- # a list of outputs to a list of outputs and each has
- # a list of slices
- outputs = list(zip(*outputs))
-
- if names is None:
- names = [None] * len(outputs)
-
- result = [tf.stack(o, axis=0, name=n)
- for o, n in zip(outputs, names)]
- if len(result) == 1:
- result = result[0]
-
- return result
- pass
-
- def trim_zeros_graph(self, boxes, name='trim_zeros'):
- """
- Often boxes are represented with matrices of shape [N, 4] and
- are padded with zeros. This removes zero boxes.
- :param boxes: [N, 4] matrix of boxes.
- :param name:
- :return: non_zeros: [N] a 1D boolean mask identifying the rows to keep
- """
-
- non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
- boxes = tf.boolean_mask(boxes, non_zeros, name=name)
- return boxes, non_zeros
- pass
-
- def detection_targets_graph(self, proposals, gt_class_ids, gt_boxes, gt_masks):
- """
- Generates detection targets for one image. Subsamples proposals and
- generates target class IDs, bounding box deltas, and masks for each.
- :param proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates.
- Might be zero padded if there are not enough proposals.
- :param gt_class_ids: [MAX_GT_INSTANCES] int class IDs
- :param gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
- :param gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
- :return: Target ROIs and corresponding class IDs, bounding box shifts, and masks.
- rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
- class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
- deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
- masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
- boundaries and resized to neural network output size.
- Note: Returned arrays might be zero padded if not enough target ROIs.
- """
-
- # Assertions
- asserts = [tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ]
-
- with tf.control_dependencies(asserts):
- proposals = tf.identity(proposals)
- pass
-
- # Remove zero padding
- proposals, _ = self.trim_zeros_graph(proposals, name="trim_proposals")
- gt_boxes, non_zeros = self.trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
- gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids")
- gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks")
-
- # Handle COCO crowds
- # A crowd box in COCO is a bounding box around several instances. Exclude
- # them from training. A crowd box is given a negative class ID.
- crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
- non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
- crowd_boxes = tf.gather(gt_boxes, crowd_ix)
- gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
- gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
- gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
-
- # Compute overlaps matrix [proposals, gt_boxes]
- overlaps = self.bbox_util.overlaps_graph(proposals, gt_boxes)
-
- # Compute overlaps with crowd boxes [proposals, crowd_boxes]
- crowd_overlaps = self.bbox_util.overlaps_graph(proposals, crowd_boxes)
- crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
- no_crowd_bool = (crowd_iou_max < 0.001)
-
- # Determine positive and negative ROIs
- roi_iou_max = tf.reduce_max(overlaps, axis=1)
- # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
- positive_roi_bool = (roi_iou_max >= 0.5)
- positive_indices = tf.where(positive_roi_bool)[:, 0]
- # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
- negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
-
- # Subsample ROIs. Aim for 33% positive
- # Positive ROIs
- positive_count = int(cfg.TRAIN.ROIS_PER_IMAGE * cfg.TRAIN.ROI_POSITIVE_RATIO)
- positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
- positive_count = tf.shape(positive_indices)[0]
- # Negative ROIs. Add enough to maintain positive:negative ratio.
- r = 1.0 / cfg.TRAIN.ROI_POSITIVE_RATIO
- negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
- negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
- # Gather selected ROIs
- positive_rois = tf.gather(proposals, positive_indices)
- negative_rois = tf.gather(proposals, negative_indices)
-
- # Assign positive ROIs to GT boxes.
- positive_overlaps = tf.gather(overlaps, positive_indices)
- roi_gt_box_assignment = tf.cond(
- tf.greater(tf.shape(positive_overlaps)[1], 0),
- true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
- false_fn=lambda: tf.cast(tf.constant([]), tf.int64)
- )
- roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
- roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
-
- # Compute bbox refinement for positive ROIs
- deltas = self.bbox_util.box_refinement_graph(positive_rois, roi_gt_boxes)
- deltas /= np.array(cfg.COMMON.BBOX_STD_DEV)
-
- # Assign positive ROIs to GT masks
- # Permute masks to [N, height, width, 1]
- transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
- # Pick the right mask for each ROI
- roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
-
- # Compute mask targets
- boxes = positive_rois
- if cfg.TRAIN.USE_MINI_MASK:
- # Transform ROI coordinates from normalized image space
- # to normalized mini-mask space.
- y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
- gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
- gt_h = gt_y2 - gt_y1
- gt_w = gt_x2 - gt_x1
- y1 = (y1 - gt_y1) / gt_h
- x1 = (x1 - gt_x1) / gt_w
- y2 = (y2 - gt_y1) / gt_h
- x2 = (x2 - gt_x1) / gt_w
- boxes = tf.concat([y1, x1, y2, x2], 1)
- box_ids = tf.range(0, tf.shape(roi_masks)[0])
- masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32),
- boxes, box_ids,
- cfg.TRAIN.MASK_SHAPE)
- # Remove the extra dimension from masks.
- masks = tf.squeeze(masks, axis=3)
-
- # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
- # binary cross entropy loss.
- masks = tf.round(masks)
-
- # Append negative ROIs and pad bbox deltas and masks that
- # are not used for negative ROIs with zeros.
- rois = tf.concat([positive_rois, negative_rois], axis=0)
- N = tf.shape(negative_rois)[0]
- P = tf.maximum(cfg.TRAIN.ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
- rois = tf.pad(rois, [(0, P), (0, 0)])
- # roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
- roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
- deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
- masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
-
- return rois, roi_gt_class_ids, deltas, masks
- pass
-
-
-
-
-
-
-

- #!/usr/bin/env python
- # _*_ coding:utf-8 _*_
- # ============================================
- # @Time : 2020/05/01 00:22
- # @Author : WanDaoYi
- # @FileName : mask_util.py
- # ============================================
-
- import warnings
- import numpy as np
- import scipy.ndimage
- from utils.image_utils import ImageUtils
- from pycocotools import mask as coco_mask_utils
- from config import cfg
-
-
- class MaskUtil(object):
-
- def __init__(self):
- self.coco_model_url = cfg.COMMON.COCO_MODEL_URL
- self.image_utils = ImageUtils()
- pass
-
- # 计算两个 masks 的 IOU 重叠率
- def compute_overlaps_masks(self, masks1, masks2):
- """
- :param masks1: [Height, Width, instances]
- :param masks2: [Height, Width, instances]
- :return: 两个 masks 的 IOU 重叠率
- """
- # 如果其中一个 masks 为空,则返回 空 结果
- mask_flag = masks1.shape[-1] == 0 or masks2.shape[-1] == 0
- if mask_flag:
- return np.zeros((masks1.shape[-1], masks2.shape[-1]))
- pass
-
- # 将 masks 扁平化后并计算它们的面积
- masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
- masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
- area1 = np.sum(masks1, axis=0)
- area2 = np.sum(masks2, axis=0)
-
- # intersections and union
- intersections = np.dot(masks1.T, masks2)
- union = area1[:, None] + area2[None, :] - intersections
- overlaps = intersections / union
-
- return overlaps
- pass
-
- def annotation_2_mask(self, annotation, height, width):
- """
- Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
- :param annotation: annotation info
- :param height: image info of height
- :param width: image info of width
- :return: binary mask (numpy 2D array)
- """
- segment = annotation['segmentation']
- if isinstance(segment, list):
- # polygon -- a single object might consist of multiple parts
- # we merge all parts into one mask rle code
- rles = coco_mask_utils.frPyObjects(segment, height, width)
- rle = coco_mask_utils.merge(rles)
- pass
- elif isinstance(segment['counts'], list):
- # uncompressed RLE
- rle = coco_mask_utils.frPyObjects(segment, height, width)
- pass
- else:
- # rle
- rle = segment['segmentation']
- pass
- mask = coco_mask_utils.decode(rle)
- return mask
- pass
-
- def load_mask(self, data, image_id):
- """
- Load instance masks for the given image.
- Different datasets use different ways to store masks. This
- function converts the different mask format to one format
- in the form of a bitmap [height, width, instances].
- :param data: The Dataset object to pick data from
- :param image_id: image id of image
- :return:
- masks: A bool array of shape [height, width, instance count] with
- one mask per instance.
- class_ids: a 1D array of class IDs of the instance masks.
- """
-
- image_info = data.image_info_list[image_id]
-
- instance_masks = []
- class_ids = []
- annotations = data.image_info_list[image_id]["annotations"]
-
- # Build mask of shape [height, width, instance_count] and list
- # of class IDs that correspond to each channel of the mask.
- for annotation in annotations:
-
- class_id = data.class_from_source_map["coco.{}".format(annotation['category_id'])]
-
- if class_id:
- m = self.annotation_2_mask(annotation, image_info["height"], image_info["width"])
-
- # Some objects are so small that they're less than 1 pixel area
- # and end up rounded out. Skip those objects.
- if m.max() < 1:
- continue
- pass
-
- # Is it a crowd? If so, use a negative class ID.
- if annotation['iscrowd']:
- # Use negative class ID for crowds
- class_id *= -1
- # For crowd masks, annToMask() sometimes returns a mask
- # smaller than the given dimensions. If so, resize it.
- if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
- m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
- instance_masks.append(m)
- class_ids.append(class_id)
-
- pass
-
- mask = np.stack(instance_masks, axis=2).astype(np.bool)
- class_ids = np.array(class_ids, dtype=np.int32)
- return mask, class_ids
- pass
-
- def resize_mask(self, mask, scale, padding, crop=None):
- """
- resize a mask using the given scale and padding.
- Typically, you get the scale and padding from resize_image() to
- ensure both, the image and the mask, are resized consistently.
- :param mask:
- :param scale: mask scaling factor
- :param padding: Padding to add to the mask in the form
- [(top, bottom), (left, right), (0, 0)]
- :param crop:
- :return:
- """
- # Suppress warning from scipy 0.13.0, the output shape of zoom() is
- # calculated with round() instead of int()
- with warnings.catch_warnings():
- warnings.simplefilter("ignore")
- mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
- if crop is not None:
- y, x, h, w = crop
- mask = mask[y:y + h, x:x + w]
- else:
- mask = np.pad(mask, padding, mode='constant', constant_values=0)
- return mask
- pass
-
- def minimize_mask(self, bbox, mask, mini_shape):
- """
- Resize masks to a smaller version to reduce memory load.
- Mini-masks can be resized back to image scale using expand_masks()
- :param bbox:
- :param mask:
- :param mini_shape:
- :return:
- """
- # 避免 传参 过来 是 list,在 cfg.TRAIN.MINI_MASK_SHAPE 获得的是 list
- mini_shape = tuple(mini_shape)
- mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
- for i in range(mask.shape[-1]):
- # Pick slice and cast to bool in case load_mask() returned wrong dtype
- m = mask[:, :, i].astype(bool)
- y1, x1, y2, x2 = bbox[i][:4]
- m = m[y1:y2, x1:x2]
- if m.size == 0:
- raise Exception("Invalid bounding box with area of zero&

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。