当前位置:   article > 正文

深度篇——实例分割(三) 细说 mask rcnn 实例分割代码 训练自己数据 之 相关网络,数据处理,工具等_maskrcnn切割数字

maskrcnn切割数字

返回主目录

返回 实例分割 目录

上一章:深度篇——实例分割(二) 细说 mask rcnn 实例分割代码 训练自己数据

 

论文地址:《Mask R-CNN》

作者代码地址:Mask R-CNN code

我优化的代码地址:mask_rcnn_pro

 

本小节,细说 mask rcnn 实例分割代码 训练自己数据 相关网络,数据处理,工具等

 

六. 相关网络,数据处理,工具 代码

下面的代码携带有注释,思考一下,理解基本不难的。不好理解的地方,直接 debug 去看。更立体。

1.相关工具文件

  1. #!/usr/bin/env python
  2. # _*_ coding:utf-8 _*_
  3. # ============================================
  4. # @Time : 2020/05/13 22:57
  5. # @Author : WanDaoYi
  6. # @FileName : image_utils.py
  7. # ============================================
  8. import numpy as np
  9. import skimage.color
  10. import skimage.io
  11. import skimage.transform
  12. from distutils.version import LooseVersion
  13. from config import cfg
  14. class ImageUtils(object):
  15. def __init__(self):
  16. self.mean_pixel = np.array(cfg.COMMON.MEAN_PIXEL)
  17. pass
  18. def parse_image_meta_graph(self, meta):
  19. """
  20. Parses a tensor that contains image attributes to its components.
  21. See compose_image_meta() for more details.
  22. :param meta: [batch, meta length] where meta length depends on NUM_CLASSES
  23. :return: Returns a dict of the parsed tensors.
  24. """
  25. image_id = meta[:, 0]
  26. original_image_shape = meta[:, 1:4]
  27. image_shape = meta[:, 4:7]
  28. window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels
  29. scale = meta[:, 11]
  30. active_class_ids = meta[:, 12:]
  31. return {
  32. "image_id": image_id,
  33. "original_image_shape": original_image_shape,
  34. "image_shape": image_shape,
  35. "window": window,
  36. "scale": scale,
  37. "active_class_ids": active_class_ids,
  38. }
  39. pass
  40. def compose_image_meta(self, image_id, original_image_shape, image_shape,
  41. window, scale, active_class_ids):
  42. """
  43. Takes attributes of an image and puts them in one 1D array.
  44. :param image_id: An int ID of the image. Useful for debugging.
  45. :param original_image_shape: [H, W, C] before resizing or padding.
  46. :param image_shape: [H, W, C] after resizing and padding
  47. :param window: (y1, x1, y2, x2) in pixels. The area of the image where the real
  48. image is (excluding the padding)
  49. :param scale: The scaling factor applied to the original image (float32)
  50. :param active_class_ids: List of class_ids available in the dataset from which
  51. the image came. Useful if training on images from multiple datasets
  52. where not all classes are present in all datasets.
  53. :return:
  54. """
  55. meta = np.array([image_id] + # size=1
  56. list(original_image_shape) + # size=3
  57. list(image_shape) + # size=3
  58. list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates
  59. [scale] + # size=1
  60. list(active_class_ids) # size=class_num
  61. )
  62. return meta
  63. pass
  64. def load_image(self, image_path):
  65. """
  66. Load the specified image and return a [H,W,3] Numpy array.
  67. :param image_path: image path
  68. :return:
  69. """
  70. # Load image
  71. image = skimage.io.imread(image_path)
  72. # If grayscale. Convert to RGB for consistency.
  73. if image.ndim != 3:
  74. image = skimage.color.gray2rgb(image)
  75. # If has an alpha channel, remove it for consistency
  76. if image.shape[-1] == 4:
  77. image = image[..., :3]
  78. return image
  79. pass
  80. def mold_image(self, images, mean_pixel):
  81. """
  82. Expects an RGB image (or array of images) and subtracts
  83. the mean pixel and converts it to float. Expects image
  84. colors in RGB order.
  85. :param images:
  86. :param mean_pixel:
  87. :return:
  88. """
  89. return images.astype(np.float32) - np.array(mean_pixel)
  90. pass
  91. def mode_input(self, images_info_list):
  92. """
  93. Takes a list of images and modifies them to the format expected
  94. as an input to the neural network.
  95. :param images_info_list: List of image matrices [height,width,depth]. Images can have
  96. different sizes.
  97. :return: returns 3 Numpy matrices:
  98. molded_images_list: [N, h, w, 3]. Images resized and normalized.
  99. image_metas_list: [N, length of meta data]. Details about each image.
  100. windows_list: [N, (y1, x1, y2, x2)]. The portion of the image that has the
  101. original image (padding excluded).
  102. """
  103. molded_images_list = []
  104. image_metas_list = []
  105. windows_list = []
  106. image_mi_dim = cfg.COMMON.IMAGE_MIN_DIM
  107. image_max_dim = cfg.COMMON.IMAGE_MAX_DIM
  108. image_min_scale = cfg.COMMON.IMAGE_MIN_SCALE
  109. image_resize_mode = cfg.COMMON.IMAGE_RESIZE_MODE
  110. for image_info in images_info_list:
  111. # resize image
  112. molded_image, window, scale, padding, crop = self.resize_image(image_info,
  113. min_dim=image_mi_dim,
  114. min_scale=image_min_scale,
  115. max_dim=image_max_dim,
  116. resize_mode=image_resize_mode)
  117. molded_image = self.mold_image(molded_image, self.mean_pixel)
  118. # Build image_meta
  119. image_meta = self.compose_image_meta(0, image_info.shape, molded_image.shape, window, scale,
  120. np.zeros([cfg.COMMON.CLASS_NUM], dtype=np.int32))
  121. # Append
  122. molded_images_list.append(molded_image)
  123. image_metas_list.append(image_meta)
  124. windows_list.append(window)
  125. pass
  126. # Pack into arrays
  127. molded_images_list = np.stack(molded_images_list)
  128. image_metas_list = np.stack(image_metas_list)
  129. windows_list = np.stack(windows_list)
  130. return molded_images_list, image_metas_list, windows_list
  131. pass
  132. def resize(self, image, output_shape, order=1, resize_mode="constant", cval=0, clip=True,
  133. preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
  134. """
  135. A wrapper for Scikit-Image resize().
  136. Scikit-Image generates warnings on every call to resize() if it doesn't
  137. receive the right parameters. The right parameters depend on the version
  138. of skimage. This solves the problem by using different parameters per
  139. version. And it provides a central place to control resizing defaults.
  140. :param image:
  141. :param output_shape:
  142. :param order:
  143. :param resize_mode:
  144. :param cval:
  145. :param clip:
  146. :param preserve_range:
  147. :param anti_aliasing:
  148. :param anti_aliasing_sigma:
  149. :return:
  150. """
  151. if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
  152. # New in 0.14: anti_aliasing. Default it to False for backward
  153. # compatibility with skimage 0.13.
  154. return skimage.transform.resize(image, output_shape,
  155. order=order, mode=resize_mode, cval=cval, clip=clip,
  156. preserve_range=preserve_range, anti_aliasing=anti_aliasing,
  157. anti_aliasing_sigma=anti_aliasing_sigma)
  158. else:
  159. return skimage.transform.resize(image, output_shape,
  160. order=order, mode=resize_mode, cval=cval, clip=clip,
  161. preserve_range=preserve_range)
  162. pass
  163. def resize_image(self, image, min_dim=None, max_dim=None, min_scale=None, resize_mode="square"):
  164. """
  165. resize an image keeping the aspect ratio unchanged.
  166. :param image:
  167. :param min_dim: if provided, resize the image such that it's smaller dimension == min_dim
  168. :param max_dim: if provided, ensures that the image longest side doesn't
  169. exceed this value.
  170. :param min_scale: if provided, ensure that the image is scaled up by at least
  171. this percent even if min_dim doesn't require it.
  172. :param resize_mode: resizing mode.
  173. none: No resizing. Return the image unchanged.
  174. square: Resize and pad with zeros to get a square image
  175. of size [max_dim, max_dim].
  176. pad64: Pads width and height with zeros to make them multiples of 64.
  177. If min_dim or min_scale are provided, it scales the image up
  178. before padding. max_dim is ignored in this mode.
  179. The multiple of 64 is needed to ensure smooth scaling of feature
  180. maps up and down the 6 levels of the FPN pyramid (2**6=64).
  181. crop: Picks random crops from the image. First, scales the image based
  182. on min_dim and min_scale, then picks a random crop of
  183. size min_dim x min_dim. Can be used in training only.
  184. max_dim is not used in this mode.
  185. :return:
  186. image: the resized image
  187. window: (y1, x1, y2, x2). If max_dim is provided, padding might
  188. be inserted in the returned image. If so, this window is the
  189. coordinates of the image part of the full image (excluding
  190. the padding). The x2, y2 pixels are not included.
  191. scale: The scale factor used to resize the image
  192. padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
  193. """
  194. # Keep track of image dtype and return results in the same dtype
  195. image_dtype = image.dtype
  196. # Default window (y1, x1, y2, x2) and default scale == 1.
  197. h, w = image.shape[:2]
  198. window = (0, 0, h, w)
  199. scale = 1
  200. padding = [(0, 0), (0, 0), (0, 0)]
  201. crop = None
  202. if resize_mode == "none":
  203. return image, window, scale, padding, crop
  204. pass
  205. # Scale?
  206. if min_dim:
  207. # Scale up but not down
  208. scale = max(1, min_dim / min(h, w))
  209. pass
  210. if min_scale and scale < min_scale:
  211. scale = min_scale
  212. pass
  213. # Does it exceed max dim?
  214. if max_dim and resize_mode == "square":
  215. image_max = max(h, w)
  216. if round(image_max * scale) > max_dim:
  217. scale = max_dim / image_max
  218. pass
  219. pass
  220. # Resize image using bilinear interpolation
  221. if scale != 1:
  222. image = self.resize(image, (round(h * scale), round(w * scale)), preserve_range=True)
  223. pass
  224. # Need padding or cropping?
  225. if resize_mode == "square":
  226. # Get new height and width
  227. h, w = image.shape[:2]
  228. top_pad = (max_dim - h) // 2
  229. bottom_pad = max_dim - h - top_pad
  230. left_pad = (max_dim - w) // 2
  231. right_pad = max_dim - w - left_pad
  232. padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
  233. image = np.pad(image, padding, mode='constant', constant_values=0)
  234. window = (top_pad, left_pad, h + top_pad, w + left_pad)
  235. pass
  236. elif resize_mode == "pad64":
  237. h, w = image.shape[:2]
  238. # Both sides must be divisible by 64
  239. assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
  240. # Height
  241. if h % 64 > 0:
  242. max_h = h - (h % 64) + 64
  243. top_pad = (max_h - h) // 2
  244. bottom_pad = max_h - h - top_pad
  245. else:
  246. top_pad = bottom_pad = 0
  247. # Width
  248. if w % 64 > 0:
  249. max_w = w - (w % 64) + 64
  250. left_pad = (max_w - w) // 2
  251. right_pad = max_w - w - left_pad
  252. else:
  253. left_pad = right_pad = 0
  254. padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
  255. image = np.pad(image, padding, mode='constant', constant_values=0)
  256. window = (top_pad, left_pad, h + top_pad, w + left_pad)
  257. pass
  258. elif resize_mode == "crop":
  259. # Pick a random crop
  260. h, w = image.shape[:2]
  261. y = np.random.randint(0, (h - min_dim))
  262. x = np.random.randint(0, (w - min_dim))
  263. crop = (y, x, min_dim, min_dim)
  264. image = image[y:y + min_dim, x:x + min_dim]
  265. window = (0, 0, min_dim, min_dim)
  266. pass
  267. else:
  268. raise Exception("Mode {} not supported".format(resize_mode))
  269. pass
  270. return image.astype(image_dtype), window, scale, padding, crop
  271. pass

 

  1. #!/usr/bin/env python
  2. # _*_ coding:utf-8 _*_
  3. # ============================================
  4. # @Time : 2020/05/13 12:06
  5. # @Author : WanDaoYi
  6. # @FileName : misc_utils.py
  7. # ============================================
  8. import math
  9. import numpy as np
  10. import tensorflow as tf
  11. from utils.bbox_utils import BboxUtil
  12. from config import cfg
  13. class MiscUtils(object):
  14. def __init__(self):
  15. self.bbox_util = BboxUtil()
  16. pass
  17. def compute_backbone_shapes(self, image_shape, backbone_strides):
  18. """
  19. Computes the width and height of each stage of the backbone network
  20. :param image_shape: [h, w, c]
  21. :param backbone_strides: The strides of each layer of the FPN Pyramid.
  22. These values are based on a resNet101 backbone.
  23. :return: [N, (height, width)]. Where N is the number of stages
  24. """
  25. return np.array(
  26. [[int(math.ceil(image_shape[0] / stride)),
  27. int(math.ceil(image_shape[1] / stride))] for stride in backbone_strides])
  28. pass
  29. def batch_slice(self, inputs, graph_fn, batch_size, names=None):
  30. """
  31. Splits inputs into slices and feeds each slice to a copy of the given
  32. computation graph and then combines the results. It allows you to run a
  33. graph on a batch of inputs even if the graph is written to support one
  34. instance only.
  35. :param inputs: list of tensors. All must have the same first dimension length
  36. :param graph_fn: A function that returns a TF tensor that's part of a graph.
  37. :param batch_size: number of slices to divide the data into.
  38. :param names: If provided, assigns names to the resulting tensors.
  39. :return:
  40. """
  41. if not isinstance(inputs, list):
  42. inputs = [inputs]
  43. outputs = []
  44. for i in range(batch_size):
  45. inputs_slice = [x[i] for x in inputs]
  46. output_slice = graph_fn(*inputs_slice)
  47. if not isinstance(output_slice, (tuple, list)):
  48. output_slice = [output_slice]
  49. outputs.append(output_slice)
  50. # Change outputs from a list of slices where each is
  51. # a list of outputs to a list of outputs and each has
  52. # a list of slices
  53. outputs = list(zip(*outputs))
  54. if names is None:
  55. names = [None] * len(outputs)
  56. result = [tf.stack(o, axis=0, name=n)
  57. for o, n in zip(outputs, names)]
  58. if len(result) == 1:
  59. result = result[0]
  60. return result
  61. pass
  62. def trim_zeros_graph(self, boxes, name='trim_zeros'):
  63. """
  64. Often boxes are represented with matrices of shape [N, 4] and
  65. are padded with zeros. This removes zero boxes.
  66. :param boxes: [N, 4] matrix of boxes.
  67. :param name:
  68. :return: non_zeros: [N] a 1D boolean mask identifying the rows to keep
  69. """
  70. non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
  71. boxes = tf.boolean_mask(boxes, non_zeros, name=name)
  72. return boxes, non_zeros
  73. pass
  74. def detection_targets_graph(self, proposals, gt_class_ids, gt_boxes, gt_masks):
  75. """
  76. Generates detection targets for one image. Subsamples proposals and
  77. generates target class IDs, bounding box deltas, and masks for each.
  78. :param proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates.
  79. Might be zero padded if there are not enough proposals.
  80. :param gt_class_ids: [MAX_GT_INSTANCES] int class IDs
  81. :param gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
  82. :param gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type.
  83. :return: Target ROIs and corresponding class IDs, bounding box shifts, and masks.
  84. rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
  85. class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
  86. deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
  87. masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
  88. boundaries and resized to neural network output size.
  89. Note: Returned arrays might be zero padded if not enough target ROIs.
  90. """
  91. # Assertions
  92. asserts = [tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ]
  93. with tf.control_dependencies(asserts):
  94. proposals = tf.identity(proposals)
  95. pass
  96. # Remove zero padding
  97. proposals, _ = self.trim_zeros_graph(proposals, name="trim_proposals")
  98. gt_boxes, non_zeros = self.trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
  99. gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids")
  100. gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks")
  101. # Handle COCO crowds
  102. # A crowd box in COCO is a bounding box around several instances. Exclude
  103. # them from training. A crowd box is given a negative class ID.
  104. crowd_ix = tf.where(gt_class_ids < 0)[:, 0]
  105. non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0]
  106. crowd_boxes = tf.gather(gt_boxes, crowd_ix)
  107. gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
  108. gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
  109. gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
  110. # Compute overlaps matrix [proposals, gt_boxes]
  111. overlaps = self.bbox_util.overlaps_graph(proposals, gt_boxes)
  112. # Compute overlaps with crowd boxes [proposals, crowd_boxes]
  113. crowd_overlaps = self.bbox_util.overlaps_graph(proposals, crowd_boxes)
  114. crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
  115. no_crowd_bool = (crowd_iou_max < 0.001)
  116. # Determine positive and negative ROIs
  117. roi_iou_max = tf.reduce_max(overlaps, axis=1)
  118. # 1. Positive ROIs are those with >= 0.5 IoU with a GT box
  119. positive_roi_bool = (roi_iou_max >= 0.5)
  120. positive_indices = tf.where(positive_roi_bool)[:, 0]
  121. # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds.
  122. negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
  123. # Subsample ROIs. Aim for 33% positive
  124. # Positive ROIs
  125. positive_count = int(cfg.TRAIN.ROIS_PER_IMAGE * cfg.TRAIN.ROI_POSITIVE_RATIO)
  126. positive_indices = tf.random_shuffle(positive_indices)[:positive_count]
  127. positive_count = tf.shape(positive_indices)[0]
  128. # Negative ROIs. Add enough to maintain positive:negative ratio.
  129. r = 1.0 / cfg.TRAIN.ROI_POSITIVE_RATIO
  130. negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
  131. negative_indices = tf.random_shuffle(negative_indices)[:negative_count]
  132. # Gather selected ROIs
  133. positive_rois = tf.gather(proposals, positive_indices)
  134. negative_rois = tf.gather(proposals, negative_indices)
  135. # Assign positive ROIs to GT boxes.
  136. positive_overlaps = tf.gather(overlaps, positive_indices)
  137. roi_gt_box_assignment = tf.cond(
  138. tf.greater(tf.shape(positive_overlaps)[1], 0),
  139. true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
  140. false_fn=lambda: tf.cast(tf.constant([]), tf.int64)
  141. )
  142. roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
  143. roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
  144. # Compute bbox refinement for positive ROIs
  145. deltas = self.bbox_util.box_refinement_graph(positive_rois, roi_gt_boxes)
  146. deltas /= np.array(cfg.COMMON.BBOX_STD_DEV)
  147. # Assign positive ROIs to GT masks
  148. # Permute masks to [N, height, width, 1]
  149. transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
  150. # Pick the right mask for each ROI
  151. roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
  152. # Compute mask targets
  153. boxes = positive_rois
  154. if cfg.TRAIN.USE_MINI_MASK:
  155. # Transform ROI coordinates from normalized image space
  156. # to normalized mini-mask space.
  157. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
  158. gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
  159. gt_h = gt_y2 - gt_y1
  160. gt_w = gt_x2 - gt_x1
  161. y1 = (y1 - gt_y1) / gt_h
  162. x1 = (x1 - gt_x1) / gt_w
  163. y2 = (y2 - gt_y1) / gt_h
  164. x2 = (x2 - gt_x1) / gt_w
  165. boxes = tf.concat([y1, x1, y2, x2], 1)
  166. box_ids = tf.range(0, tf.shape(roi_masks)[0])
  167. masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32),
  168. boxes, box_ids,
  169. cfg.TRAIN.MASK_SHAPE)
  170. # Remove the extra dimension from masks.
  171. masks = tf.squeeze(masks, axis=3)
  172. # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with
  173. # binary cross entropy loss.
  174. masks = tf.round(masks)
  175. # Append negative ROIs and pad bbox deltas and masks that
  176. # are not used for negative ROIs with zeros.
  177. rois = tf.concat([positive_rois, negative_rois], axis=0)
  178. N = tf.shape(negative_rois)[0]
  179. P = tf.maximum(cfg.TRAIN.ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
  180. rois = tf.pad(rois, [(0, P), (0, 0)])
  181. # roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
  182. roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
  183. deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
  184. masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
  185. return rois, roi_gt_class_ids, deltas, masks
  186. pass

 

  1. #!/usr/bin/env python
  2. # _*_ coding:utf-8 _*_
  3. # ============================================
  4. # @Time : 2020/05/01 00:22
  5. # @Author : WanDaoYi
  6. # @FileName : mask_util.py
  7. # ============================================
  8. import warnings
  9. import numpy as np
  10. import scipy.ndimage
  11. from utils.image_utils import ImageUtils
  12. from pycocotools import mask as coco_mask_utils
  13. from config import cfg
  14. class MaskUtil(object):
  15. def __init__(self):
  16. self.coco_model_url = cfg.COMMON.COCO_MODEL_URL
  17. self.image_utils = ImageUtils()
  18. pass
  19. # 计算两个 masks 的 IOU 重叠率
  20. def compute_overlaps_masks(self, masks1, masks2):
  21. """
  22. :param masks1: [Height, Width, instances]
  23. :param masks2: [Height, Width, instances]
  24. :return: 两个 masks 的 IOU 重叠率
  25. """
  26. # 如果其中一个 masks 为空,则返回 空 结果
  27. mask_flag = masks1.shape[-1] == 0 or masks2.shape[-1] == 0
  28. if mask_flag:
  29. return np.zeros((masks1.shape[-1], masks2.shape[-1]))
  30. pass
  31. # 将 masks 扁平化后并计算它们的面积
  32. masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
  33. masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
  34. area1 = np.sum(masks1, axis=0)
  35. area2 = np.sum(masks2, axis=0)
  36. # intersections and union
  37. intersections = np.dot(masks1.T, masks2)
  38. union = area1[:, None] + area2[None, :] - intersections
  39. overlaps = intersections / union
  40. return overlaps
  41. pass
  42. def annotation_2_mask(self, annotation, height, width):
  43. """
  44. Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
  45. :param annotation: annotation info
  46. :param height: image info of height
  47. :param width: image info of width
  48. :return: binary mask (numpy 2D array)
  49. """
  50. segment = annotation['segmentation']
  51. if isinstance(segment, list):
  52. # polygon -- a single object might consist of multiple parts
  53. # we merge all parts into one mask rle code
  54. rles = coco_mask_utils.frPyObjects(segment, height, width)
  55. rle = coco_mask_utils.merge(rles)
  56. pass
  57. elif isinstance(segment['counts'], list):
  58. # uncompressed RLE
  59. rle = coco_mask_utils.frPyObjects(segment, height, width)
  60. pass
  61. else:
  62. # rle
  63. rle = segment['segmentation']
  64. pass
  65. mask = coco_mask_utils.decode(rle)
  66. return mask
  67. pass
  68. def load_mask(self, data, image_id):
  69. """
  70. Load instance masks for the given image.
  71. Different datasets use different ways to store masks. This
  72. function converts the different mask format to one format
  73. in the form of a bitmap [height, width, instances].
  74. :param data: The Dataset object to pick data from
  75. :param image_id: image id of image
  76. :return:
  77. masks: A bool array of shape [height, width, instance count] with
  78. one mask per instance.
  79. class_ids: a 1D array of class IDs of the instance masks.
  80. """
  81. image_info = data.image_info_list[image_id]
  82. instance_masks = []
  83. class_ids = []
  84. annotations = data.image_info_list[image_id]["annotations"]
  85. # Build mask of shape [height, width, instance_count] and list
  86. # of class IDs that correspond to each channel of the mask.
  87. for annotation in annotations:
  88. class_id = data.class_from_source_map["coco.{}".format(annotation['category_id'])]
  89. if class_id:
  90. m = self.annotation_2_mask(annotation, image_info["height"], image_info["width"])
  91. # Some objects are so small that they're less than 1 pixel area
  92. # and end up rounded out. Skip those objects.
  93. if m.max() < 1:
  94. continue
  95. pass
  96. # Is it a crowd? If so, use a negative class ID.
  97. if annotation['iscrowd']:
  98. # Use negative class ID for crowds
  99. class_id *= -1
  100. # For crowd masks, annToMask() sometimes returns a mask
  101. # smaller than the given dimensions. If so, resize it.
  102. if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
  103. m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
  104. instance_masks.append(m)
  105. class_ids.append(class_id)
  106. pass
  107. mask = np.stack(instance_masks, axis=2).astype(np.bool)
  108. class_ids = np.array(class_ids, dtype=np.int32)
  109. return mask, class_ids
  110. pass
  111. def resize_mask(self, mask, scale, padding, crop=None):
  112. """
  113. resize a mask using the given scale and padding.
  114. Typically, you get the scale and padding from resize_image() to
  115. ensure both, the image and the mask, are resized consistently.
  116. :param mask:
  117. :param scale: mask scaling factor
  118. :param padding: Padding to add to the mask in the form
  119. [(top, bottom), (left, right), (0, 0)]
  120. :param crop:
  121. :return:
  122. """
  123. # Suppress warning from scipy 0.13.0, the output shape of zoom() is
  124. # calculated with round() instead of int()
  125. with warnings.catch_warnings():
  126. warnings.simplefilter("ignore")
  127. mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
  128. if crop is not None:
  129. y, x, h, w = crop
  130. mask = mask[y:y + h, x:x + w]
  131. else:
  132. mask = np.pad(mask, padding, mode='constant', constant_values=0)
  133. return mask
  134. pass
  135. def minimize_mask(self, bbox, mask, mini_shape):
  136. """
  137. Resize masks to a smaller version to reduce memory load.
  138. Mini-masks can be resized back to image scale using expand_masks()
  139. :param bbox:
  140. :param mask:
  141. :param mini_shape:
  142. :return:
  143. """
  144. # 避免 传参 过来 是 list,在 cfg.TRAIN.MINI_MASK_SHAPE 获得的是 list
  145. mini_shape = tuple(mini_shape)
  146. mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
  147. for i in range(mask.shape[-1]):
  148. # Pick slice and cast to bool in case load_mask() returned wrong dtype
  149. m = mask[:, :, i].astype(bool)
  150. y1, x1, y2, x2 = bbox[i][:4]
  151. m = m[y1:y2, x1:x2]
  152. if m.size == 0:
  153. raise Exception("Invalid bounding box with area of zero&
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/327179
推荐阅读
相关标签
  

闽ICP备14008679号