赞
踩
RetinaNet算法源自2018年Facebook AI Research的论文 Focal Loss for Dense Object Detection,作者包括了Ross大神、Kaiming大神和Piotr大神。该论文最大的贡献在于提出了Focal Loss用于解决类别不均衡问题,从而创造了RetinaNet(One Stage目标检测算法)这个精度超越经典Two Stage的Faster-RCNN的目标检测网络。
上图出自https://blog.csdn.net/weixin_48167570/article/details/120937167
features = self.fpn([x2, x3, x4])
# [bs, 9*h*w*5, 4]
regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
# [bs, 9*h*w*5, classes]
classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
如上,最终所有尺度计算出来的向量被连接在一起(concatenate on dim=1),送入下一阶段的损失计算部分中
if self.training:
return self.focalLoss(classification, regression, anchors, annotations)
这一步计算得到的IOU用作下一步的训练计算正负样本,保证训练时每个anchor都尽量对应着一个GT
def calc_iou(a, b): # a,b分别是两个框的[x1,y1,x2,y2]坐标 area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) # 计算相交区域的宽和高 # 此处经过squeeze,向量从[n]到[n,1],iw * ih = interaction_area, iw.shape:[n,n],iw是交叉区域的宽度,同理ih为高度 # 这里的torch.min操作可以看作一种广播机制,将每个anchor与所有的annotations bbox进行iou的计算 iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0]) ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1]) # 对宽高剪裁,防止为负数 iw = torch.clamp(iw, min=0) ih = torch.clamp(ih, min=0) # Sa + Sg - Si = Su:并区域面积=anchor面积+annotation面积-交叉区域面积 ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih # 限制最小的并区域面积,太小会导致交并比过大 ua = torch.clamp(ua, min=1e-8) # 交叉的面积 intersection = iw * ih # 最后的iou计算,IoU.shape:[9*h*w*5, n]其中shape[0]为anchor数量,shape[1]为anchor对应的所有annotations的交并比 IoU = intersection / ua return IoU
损失函数公式
上面是传统的二分类交叉熵损失,经过修改,简写为以下损失函数:
加入平衡因子平衡本身的不平均:
注:
下面着重于focal loss的代码实现:
class FocalLoss(nn.Module): def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] # anchors:[0,5 * h*w, 9] 多个特征图的anchor数组 anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): # classification, regression = [bs, 5 * num_anchors * w * h, num_classes], [bs, 5 * num_anchors * w * h, 4] # 第j个batch的所有anchor classification = classifications[j, :, :] regression = regressions[j, :, :] # bbox_annotation = [bs, n, 5] 其中 5含有[x, y, x, y, label] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] # 非填充目标 # 将classification的predict限制在一定范围内 classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) ''' 分为样本含有gt和不含有gt两种情况讨论 ''' # 1. 如果一个GT都没有的情况 if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): # 计算平衡因子 alpha_factor = torch.ones(classification.shape).cuda() * alpha alpha_factor = 1. - alpha_factor # 计算focal weight focal_weight = classification focal_weight = alpha_factor * torch.pow(focal_weight, gamma) # 计算bce bce = -(torch.log(1.0 - classification)) # 最终的损失计算,回归损失按0计算 cls_loss = focal_weight * bce classification_losses.append(cls_loss.sum()) regression_losses.append(torch.tensor(0).float().cuda()) # 2. batch的图片含有GT时 IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1,每个anchor对应的最大交并比和对应的IoU对应的annotation索引 # targets作为GT与classification计算损失,初始化为-1 targets = torch.ones(classification.shape) * -1 # shape:[9*w*h, K] # 比较 IoU_max,分配targets的正负样本分配 targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) # 正样本数数量 num_positive_anchors = positive_indices.sum() # 给每个anchor分配的GT bbox, shape:[num_positive_anchors,5] assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 # 对targets中每个满足IoU要求的对应种类赋值1 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 if torch.cuda.is_available(): alpha_factor = torch.ones(targets.shape).cuda() * alpha else: alpha_factor = torch.ones(targets.shape) * alpha # 对于正样本,α=alpha_factor而负样本赋值1-alpha_factor alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) # focal_weight focal_weight = alpha_factor * torch.pow(focal_weight, gamma) # binary crossentropy loss---bce.shape:[num_anchors, num_classes] bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # 最后的loss计算 cls_loss = focal_weight * bce if torch.cuda.is_available(): cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) else: cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape)) classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0)) # 计算回归损失值,分为锚框的IoU满足阈值的正样本索引和负样本索引 if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] # x,y,x1,y1 -> cx,cy,w,h gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) # targets是真实中心坐标与预测的anchor中心坐标偏移与anchor长宽的比值,也就是需要预测的值 ''' 梳理一下: coco数据集标记的锚框是x,y,w,h格式 通过coco的工具包处理,处理成了x,y,x1,y1形式 预测的结果:cx,cy,logΔx,logΔy ''' targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() if torch.cuda.is_available(): targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() else: targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0 ) regression_losses.append(regression_loss.mean()) else: # 全是预测的负样本,回归损失置零 if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。