当前位置:   article > 正文

SSD300网络结构(pytorch)+多尺度训练与测试

ssd300

一.SSD300

1.如图是预测框的相应feature map 

这里smin是0.2,表示最底层的scale是0.2;smax是0.9,表示最高层的scale是0.9,m代表产生尺度预测的feature map个数。

其中anchor的长宽关系,s就是上图中的scale,a就是上图中的anchor ratio

2.代码

主要由三部分组成

1.vgg作为基础网络

要注意的是作者对38*38*512进行L2正则化,并用一个可学习参数调节通道权重

2.增加大目标检测网络

3.输出包括预测框的偏移量输出与分类

偏移量计算,神经网络学习偏移量即可。

误检的HEM(hard negative mine)loss函数,用于分类

1.回归量与坐标的转换

  1. def cxcy_to_gcxgcy(cxcy, priors_cxcy):
  2. # See https://github.com/weiliu89/caffe/issues/155
  3. return torch.cat([(cxcy[:, :2] - priors_cxcy[:, :2]) / (priors_cxcy[:, 2:] / 10), # g_c_x, g_c_y
  4. torch.log(cxcy[:, 2:] / priors_cxcy[:, 2:]) * 5], 1) # g_w, g_h
  5. def gcxgcy_to_cxcy(gcxgcy, priors_cxcy):
  6. return torch.cat([gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2], # c_x, c_y
  7. torch.exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]], 1) # w, h

2.anchor与gt框匹配示例,保证每个gt至少有一个anchor

  1. #两个gt框 3个anchor 的框分配示例
  2. import torch
  3. objects = 2
  4. overlap = torch.tensor([[0.4, 0.5, 0.6],
  5. [0.8, 0.9, 0.7]])
  6. iou_for_each_prior, index_for_each_prior = torch.max(overlap, dim=0)
  7. print(iou_for_each_prior, index_for_each_prior)
  8. iou_for_each_box, index_for_each_box = torch.max(overlap, dim=1)
  9. print(iou_for_each_box, index_for_each_box)
  10. index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects))
  11. print(index_for_each_prior)

3.gt框与对应anchor框做回归的示例,其中的true_classes是两个样本,每一个样本有3个box框的类别示例,0代表背景

  1. #两个gt框 3个anchor 的框分配示例
  2. import torch
  3. objects = 2
  4. overlap = torch.tensor([[0.4, 0.5, 0.6],
  5. [0.8, 0.9, 0.7]])
  6. iou_for_each_prior, index_for_each_prior = torch.max(overlap, dim=0)
  7. print(iou_for_each_prior, index_for_each_prior)
  8. iou_for_each_box, index_for_each_box = torch.max(overlap, dim=1)
  9. print(iou_for_each_box, index_for_each_box)
  10. index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects))
  11. print(index_for_each_prior)
  12. batch_size = 2
  13. true_classes = torch.tensor([[0, 1, 3],#每一个样本3个box框的类别示例,0代表背景
  14. [2, 4, 5]])
  15. positive_priors = true_classes != 0
  16. print('=positive_priors:\n', positive_priors)
  17. pre_locs = torch.rand((batch_size, 3, 4))
  18. print('==pre_locs[positive_priors].shape:\n', pre_locs[positive_priors].shape)
  19. true_locs = torch.rand((batch_size, 3, 4))
  20. print('==true_locs[positive_priors].shape:\n', true_locs[positive_priors].shape)

4.总体代码:

  1. import torch
  2. import os
  3. from torch import nn
  4. import torch.nn.functional as F
  5. import torchvision
  6. from torchvision import models
  7. from utils import decimate, find_jaccard_overlap, cxcy_to_xy, xy_to_cxcy
  8. from utils import cxcy_to_gcxgcy as cx_cy_dxdy
  9. from math import sqrt
  10. # vgg16 = models.vgg16(pretrained=True)
  11. # print(vgg16)
  12. # vgg16_state_dict = vgg16.state_dict()
  13. # print(list(vgg16_state_dict.keys()))
  14. # print(vgg16_state_dict.values())
  15. # for key, value in vgg16.named_parameters():
  16. # print('key:', key)
  17. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  18. class VGGbase(nn.Module):
  19. """vgg 主干网络"""
  20. def __init__(self):
  21. super(VGGbase, self).__init__()
  22. self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
  23. self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
  24. self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
  25. self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
  26. self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
  27. self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
  28. self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
  29. self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
  30. self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
  31. self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
  32. self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
  33. self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
  34. self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
  35. self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
  36. self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
  37. self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
  38. self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
  39. self.pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)#为了保证尺寸不在减少
  40. self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=6, dilation=6)#空洞卷积扩大感受野
  41. self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
  42. self.load_pretrained_layers()#载入预训练权重
  43. #(BS, 3, 300, 300)
  44. def forward(self, image):
  45. out = F.relu(self.conv1_1(image))
  46. out = F.relu(self.conv1_2(out))
  47. out = self.pool1(out)#(B,64, 150, 150)
  48. out = F.relu(self.conv2_1(out))
  49. out = F.relu(self.conv2_2(out))
  50. out = self.pool2(out) #(B, 128, 75, 75)
  51. out = F.relu(self.conv3_1(out))
  52. out = F.relu(self.conv3_2(out))
  53. out = F.relu(self.conv3_3(out))
  54. out = self.pool3(out) # (B, 256, 38, 38)
  55. out = F.relu(self.conv4_1(out))
  56. out = F.relu(self.conv4_2(out))
  57. out = F.relu(self.conv4_3(out))
  58. conv4_3feats = out # (B, 512, 38, 38)
  59. out = self.pool4(out) # (B, 512, 19, 19)
  60. out = F.relu(self.conv5_1(out))
  61. out = F.relu(self.conv5_2(out))
  62. out = F.relu(self.conv5_3(out))
  63. out = self.pool5(out) # (B, 512, 19, 19)
  64. out = F.relu(self.conv6(out))
  65. conv7_feats = F.relu(self.conv7(out))# (B, 1024, 19, 19)
  66. # print(out.shape)
  67. return conv4_3feats, conv7_feats
  68. def load_pretrained_layers(self):
  69. state_dict = self.state_dict()
  70. param_name = list(state_dict.keys())
  71. print('param_name', param_name)
  72. pretrained_state_dict = models.vgg16(pretrained=True).state_dict()
  73. pretrained_param_name = list(pretrained_state_dict.keys())
  74. print('pretrained_param_name', pretrained_param_name)
  75. #由于最后两层与原vgg网络相比多出来的,故权重和偏置要点到为止
  76. for i, param in enumerate(param_name[:-4]):
  77. # print('pretrained_state_dict[pretrained_param_name[i]].shape', pretrained_state_dict[pretrained_param_name[i]].shape)
  78. state_dict[param] = pretrained_state_dict[pretrained_param_name[i]]
  79. # #最后两层的权重由分类器权重修改而来
  80. # print("pretrained_state_dict['classifier.0.weight'].shape",pretrained_state_dict['classifier.0.weight'].shape)
  81. conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].reshape(4096, 512, 7, 7)
  82. # print('===conv_fc6_weight.dim()==', conv_fc6_weight.dim())
  83. state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3])#(1024, 512, 3, 3)
  84. conv_fc6_bias = pretrained_state_dict['classifier.0.bias']#(4096)
  85. state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4])#(1024)
  86. # print(pretrained_state_dict['classifier.3.weight'].shape)
  87. # print(pretrained_state_dict['classifier.6.weight'].shape)
  88. conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].reshape(4096, 4096, 1, 1)
  89. state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None]) # (1024, 1024, 1, 1)
  90. conv_fc7_bias = pretrained_state_dict['classifier.3.bias'] # (4096)
  91. state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4]) # (1024)
  92. self.load_state_dict(state_dict)
  93. class AuxiliaryConvolutions(nn.Module):
  94. "继续在vgg基础上添加conv网络"
  95. def __init__(self):
  96. super(AuxiliaryConvolutions, self).__init__()#调用父类初始化
  97. self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1)
  98. self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
  99. self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1)
  100. self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
  101. self.conv9_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1)
  102. self.conv9_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
  103. self.conv10_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1)
  104. self.conv10_2 = nn.Conv2d(128, 256, kernel_size=3, stride=1)
  105. self.conv11_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1)
  106. self.conv11_2 = nn.Conv2d(128, 256, kernel_size=3, stride=1)
  107. self.init_conv2d()
  108. def init_conv2d(self):
  109. for c in self.children():
  110. if isinstance(c, nn.Conv2d):
  111. nn.init.xavier_uniform_(c.weight)
  112. # nn.init.kaiming_normal_(c.weight)
  113. nn.init.constant_(c.bias, 0)
  114. def forward(self, input):
  115. out = F.relu(self.conv8_1(input))#(B,1024,19,19)
  116. out = F.relu(self.conv8_2(out)) #(B,512,19,19)
  117. conv8_2feats = out
  118. out = F.relu(self.conv9_1(out)) #(B,512,10,10)
  119. out = F.relu(self.conv9_2(out)) ##(B,256,5,5)
  120. conv9_2feats = out
  121. out = F.relu(self.conv10_1(out)) # (B,128,5,5)
  122. out = F.relu(self.conv10_2(out)) ##(B,256,3,3)
  123. conv10_2feats = out
  124. out = F.relu(self.conv11_1(out)) # (B,128,3,3)
  125. out = F.relu(self.conv11_2(out)) ##(B,256,1,1)
  126. conv11_2feats = out
  127. # print(out.size())
  128. return conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats
  129. class PredictionConvolutions(nn.Module):
  130. """卷积层输出框偏移量与分类"""
  131. def __init__(self, n_classes):
  132. super(PredictionConvolutions, self).__init__()
  133. self.n_classes = n_classes
  134. bboxs={
  135. 'conv4_3': 4,
  136. 'conv7': 6,
  137. 'conv8_2': 6,
  138. 'conv9_2': 6,
  139. 'conv10_2': 4,
  140. 'conv11_2': 4
  141. }
  142. self.loc_conv4_3 = nn.Conv2d(512, bboxs['conv4_3']*4, kernel_size=3, padding=1)
  143. self.loc_conv7 = nn.Conv2d(1024, bboxs['conv7'] * 4, kernel_size=3, padding=1)
  144. self.loc_conv8_2 = nn.Conv2d(512, bboxs['conv8_2'] * 4, kernel_size=3, padding=1)
  145. self.loc_conv9_2 = nn.Conv2d(256, bboxs['conv9_2'] * 4, kernel_size=3, padding=1)
  146. self.loc_conv10_2 = nn.Conv2d(256, bboxs['conv10_2'] * 4, kernel_size=3, padding=1)
  147. self.loc_conv11_2 = nn.Conv2d(256, bboxs['conv11_2'] * 4, kernel_size=3, padding=1)
  148. self.cl_conv4_3 = nn.Conv2d(512, bboxs['conv4_3'] * n_classes, kernel_size=3, padding=1)
  149. self.cl_conv7 = nn.Conv2d(1024, bboxs['conv7'] * n_classes, kernel_size=3, padding=1)
  150. self.cl_conv8_2 = nn.Conv2d(512, bboxs['conv8_2'] * n_classes, kernel_size=3, padding=1)
  151. self.cl_conv9_2 = nn.Conv2d(256, bboxs['conv9_2'] * n_classes, kernel_size=3, padding=1)
  152. self.cl_conv10_2 = nn.Conv2d(256, bboxs['conv10_2'] * n_classes, kernel_size=3, padding=1)
  153. self.cl_conv11_2 = nn.Conv2d(256, bboxs['conv11_2'] * n_classes, kernel_size=3, padding=1)
  154. self.init_conv2d()
  155. def init_conv2d(self):
  156. for c in self.children():
  157. if isinstance(c, nn.Conv2d):
  158. nn.init.xavier_uniform_(c.weight)
  159. # nn.init.kaiming_normal_(c.weight)
  160. nn.init.constant_(c.bias, 0)
  161. def forward(self, conv4_3feats,conv7_feats,conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats):
  162. batch_size = conv4_3feats.size(0)
  163. loc_conv4_3 = self.loc_conv4_3(conv4_3feats)#(N, 4*4, 38, 38)
  164. loc_conv4_3 = loc_conv4_3.permute(0, 2, 3, 1)#(N, 38, 38, 4*4)
  165. loc_conv4_3 = loc_conv4_3.reshape(batch_size, -1, 4)
  166. # print(loc_conv4_3.shape)
  167. loc_conv7 = self.loc_conv7(conv7_feats) # (N, 6*4, 19, 19)
  168. loc_conv7 = loc_conv7.permute(0, 2, 3, 1)
  169. loc_conv7 = loc_conv7.reshape(batch_size, -1, 4)
  170. loc_conv8_2 = self.loc_conv8_2(conv8_2feats) # (N, 6*4, 10, 10)
  171. loc_conv8_2 = loc_conv8_2.permute(0, 2, 3, 1)
  172. loc_conv8_2 = loc_conv8_2.reshape(batch_size, -1, 4)
  173. loc_conv9_2 = self.loc_conv9_2(conv9_2feats) # (N, 6*4, 5, 5)
  174. loc_conv9_2 = loc_conv9_2.permute(0, 2, 3, 1)
  175. loc_conv9_2 = loc_conv9_2.reshape(batch_size, -1, 4)
  176. loc_conv10_2 = self.loc_conv10_2(conv10_2feats) # (N, 4*4, 3, 3)
  177. loc_conv10_2 = loc_conv10_2.permute(0, 2, 3, 1)
  178. loc_conv10_2 = loc_conv10_2.reshape(batch_size, -1, 4)
  179. loc_conv11_2 = self.loc_conv11_2(conv11_2feats) # (N, 4*4, 1, 1)
  180. loc_conv11_2 = loc_conv11_2.permute(0, 2, 3, 1)
  181. loc_conv11_2 = loc_conv11_2.reshape(batch_size, -1, 4)
  182. cl_conv4_3 = self.cl_conv4_3(conv4_3feats) # (N, 4*n_classes, 38, 38)
  183. cl_conv4_3 = cl_conv4_3.permute(0, 2, 3, 1)
  184. cl_conv4_3 = cl_conv4_3.reshape(batch_size, -1, self.n_classes)
  185. cl_conv7 = self.cl_conv7(conv7_feats) # (N, 6*n_classes, 19, 19)
  186. cl_conv7 = cl_conv7.permute(0, 2, 3, 1)
  187. cl_conv7 = cl_conv7.reshape(batch_size, -1, self.n_classes)
  188. cl_conv8_2 = self.cl_conv8_2(conv8_2feats) # (N, 6*n_classes, 10, 10)
  189. cl_conv8_2 = cl_conv8_2.permute(0, 2, 3, 1)
  190. cl_conv8_2 = cl_conv8_2.reshape(batch_size, -1, self.n_classes)
  191. cl_conv9_2 = self.cl_conv9_2(conv9_2feats) # (N, 6*n_classes, 5, 5)
  192. cl_conv9_2 = cl_conv9_2.permute(0, 2, 3, 1)
  193. cl_conv9_2 = cl_conv9_2.reshape(batch_size, -1, self.n_classes)
  194. cl_conv10_2 = self.cl_conv10_2(conv10_2feats) # (N, 4*n_classes, 3, 3)
  195. cl_conv10_2 = cl_conv10_2.permute(0, 2, 3, 1)
  196. cl_conv10_2 = cl_conv10_2.reshape(batch_size, -1, self.n_classes)
  197. cl_conv11_2 = self.cl_conv11_2(conv11_2feats) # (N, 4*n_classes, 1, 1)
  198. cl_conv11_2 = cl_conv11_2.permute(0, 2, 3, 1)
  199. cl_conv11_2 = cl_conv11_2.reshape(batch_size, -1, self.n_classes)
  200. # return loc_conv4_3, loc_conv7, loc_conv8_2, loc_conv9_2, loc_conv10_2, loc_conv11_2,\
  201. # cl_conv4_3, cl_conv7, cl_conv8_2, cl_conv9_2, cl_conv10_2, cl_conv11_2
  202. locs = torch.cat((loc_conv4_3, loc_conv7, loc_conv8_2, loc_conv9_2, loc_conv10_2, loc_conv11_2),dim=1)
  203. class_scores = torch.cat((cl_conv4_3, cl_conv7, cl_conv8_2, cl_conv9_2, cl_conv10_2, cl_conv11_2),dim=1)
  204. return locs,class_scores#(10, 8732, 4) (10, 8732, 21)
  205. class SSD300(nn.Module):
  206. def __init__(self, n_classes):
  207. super(SSD300, self).__init__()
  208. self.n_classes = n_classes
  209. self.base_vgg = VGGbase()
  210. self.aux_convs = AuxiliaryConvolutions()
  211. self.pre_convs = PredictionConvolutions(self.n_classes)
  212. #对conv4_3添加每个通道添加可学习参数,并进行L2正则化
  213. self.rescale_factors = nn.Parameter(torch.FloatTensor(1, 512, 1, 1))
  214. nn.init.constant_(self.rescale_factors, 20)
  215. self.create_prior_boxes()
  216. def forward(self, input):
  217. conv4_3feats, conv7_feats = self.base_vgg(input)#(N,512,38,38) (N,1024,19,19)
  218. norm = torch.pow(conv4_3feats, 2).sum(dim=1, keepdim=True).sqrt()#(B, 1, 38, 38)对所有通道的每一行求平方和L2正则 开更号
  219. conv4_3feats = conv4_3feats/norm*self.rescale_factors
  220. conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats = self.aux_convs(conv7_feats)
  221. locs, class_scores = self.pre_convs(conv4_3feats, conv7_feats, conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats)
  222. return locs, class_scores#(10, 8732, 4) (10, 8732, 21)
  223. def create_prior_boxes(self):
  224. """创建SSD300的先验框(cx, cy, w, h)
  225. (8372,4)个box"""
  226. fmap_size = {'conv4_3': 38, 'conv7': 19, 'conv8_2': 10,
  227. 'conv9_2': 5, 'conv10_2': 3, 'conv11_2': 1}
  228. anchor_scale = {'conv4_3': 0.1, 'conv7': 0.2, 'conv8_2': 0.375,
  229. 'conv9_2': 0.55, 'conv10_2': 0.725, 'conv11_2': 0.9}
  230. anchor_ratio = {'conv4_3': [1, 2, 0.5], 'conv7': [1, 2, 3, 0.5, 0.33], 'conv8_2': [1, 2, 3, 0.5, 0.33],
  231. 'conv9_2': [1, 2, 3, 0.5, 0.33], 'conv10_2': [1, 2, 0.5], 'conv11_2': [1, 2, 0.5]}
  232. prior_boxes = []
  233. for index, fmap in enumerate(fmap_size):
  234. for i in range(fmap_size[fmap]):
  235. for j in range(fmap_size[fmap]):
  236. cy, cx = (i + 0.5) / fmap_size[fmap], (j + 0.5) / fmap_size[fmap]
  237. for ratio in anchor_ratio[fmap]:
  238. prior_boxes.append([cx, cy, anchor_scale[fmap] * sqrt(ratio), anchor_scale[fmap] / sqrt(ratio)])
  239. if ratio == 1: # 添加额外框
  240. try:
  241. extra_scale = sqrt(anchor_scale[fmap] * anchor_scale[fmap_size[index + 1]])
  242. except:
  243. extra_scale = 1.
  244. prior_boxes.append([cx, cy, extra_scale, extra_scale])
  245. # print('len(prior_boxes)',len(prior_boxes))
  246. # prior_boxes = [[1,2,3,4],
  247. # [3,4,5,6]]
  248. prior_boxes = torch.FloatTensor(prior_boxes).to(device)
  249. prior_boxes.clamp_(0, 1) # 防止越界
  250. print('prior_boxes.shape', prior_boxes.shape)
  251. # print(prior_boxes)
  252. return prior_boxes#(8732, 4)
  253. class MultiBoxLoss(nn.Module):
  254. """定位loss和分类loss,其中定位loss采用Hard Negative Mining."""
  255. def __init__(self, prior_cxcy, threshold=0.5, neg_pos_ratio=3, alph=1.):
  256. super(MultiBoxLoss, self).__init__()
  257. self.prior_cxcy = prior_cxcy#(8732,4)
  258. self.priors_xy = cxcy_to_xy(prior_cxcy)
  259. self.threshold = threshold
  260. self.neg_pos_ratio = neg_pos_ratio
  261. self.alph = alph
  262. self.smooth_l1 = nn.L1Loss()
  263. self.cross_entropy = nn.CrossEntropyLoss(reduce=False)#不计算batch的平均loss因为要用到hard mine模式
  264. def forward(self, prediction_locs, prediction_scores, boxes, labels):
  265. """
  266. prediction_locs,(N, 8732, 4)
  267. prediction_scores,(N, 8732, n_classes)
  268. boxes,[[],[[],[]]]
  269. labels[[],[]]
  270. """
  271. batch_size = prediction_locs.shape[0]#(N,)
  272. n_priors = self.prior_cxcy.shape[0]#(8732,)
  273. n_classes = prediction_scores.shape[-1]#(n_classes)
  274. # print('==batch_size', batch_size)
  275. assert batch_size == len(boxes)
  276. assert n_priors == prediction_locs.shape[1] == prediction_scores.shape[1]
  277. true_locs = torch.zeros((batch_size, n_priors, 4),dtype=torch.float)#(N, 8732, 4)
  278. true_classes = torch.zeros((batch_size, n_priors),dtype=torch.long)#(N, 8732)
  279. for i in range(batch_size):
  280. # print('===boxes[i]', boxes[i])
  281. objects = boxes[i].shape[0] #(objects, 4) (8732, 4)
  282. overlap = find_jaccard_overlap(boxes[i], self.priors_xy)#(objects, 8732)
  283. # 每个先验框与gt框的最大IOU 以及索引
  284. iou_for_each_prior, index_for_each_prior = overlap.max(dim=0)
  285. # 每个gt框与先验框的最大IOU 以及索引
  286. iou_for_each_box, index_for_each_box = overlap.max(dim=1)
  287. #为了防止没有相应的先验框与gt相交
  288. index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects)).to(device)
  289. iou_for_each_prior[index_for_each_box] = 1.
  290. label_for_each_prior = labels[i][index_for_each_prior]#得到对应的每个先验框的标签
  291. label_for_each_prior[iou_for_each_prior<self.threshold] = 0#将小于阈值的置为背景
  292. #依次存储batchsize
  293. true_classes[i] = label_for_each_prior
  294. true_locs[i] = cx_cy_dxdy(xy_to_cxcy(boxes[i][index_for_each_prior]), self.prior_cxcy)#得到偏移量
  295. print('true_classes.dtype',true_classes.dtype)
  296. positive_priors = true_classes != 0#batch_size 正样本(N,8732)
  297. print('positive_priors.dtype',positive_priors.dtype)
  298. print('==positive_priors.shape', positive_priors.shape)
  299. print('==positive_priors', positive_priors)
  300. loc_loss = self.smooth_l1(prediction_locs[positive_priors], true_locs[positive_priors])
  301. n_postives = positive_priors.sum(dim=1)#(N,)
  302. n_hard_negatives = self.neg_pos_ratio*n_postives#(N,)
  303. confidence_loss_all = self.cross_entropy(prediction_scores.reshape(-1, n_classes), true_classes.reshape(-1))
  304. confidence_loss_all = confidence_loss_all.reshape(batch_size, n_priors)
  305. print('==confidence_loss_all.shape', confidence_loss_all.shape)
  306. confidence_loss_pos = confidence_loss_all[positive_priors]
  307. #
  308. print('==confidence_loss_pos.shape', confidence_loss_pos.shape)
  309. confidence_loss_neg = confidence_loss_all.clone()#(N, 8732)
  310. confidence_loss_neg[positive_priors] = 0.#(N, 8732)#把正样本loss清零再去做HEM
  311. confidence_loss_neg, _ = confidence_loss_neg.sort(dim=1, descending=True)#(N,8732)按行从大到小
  312. hardness_ranks = torch.LongTensor(range(n_priors)).unsqueeze(0).expand_as(confidence_loss_neg) # (N, 8732)
  313. hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1) # (N, 8732)
  314. confidence_loss_hard = confidence_loss_all[hard_negatives]
  315. # print('==confidence_loss_hard.shape', confidence_loss_hard.shape)
  316. confidence_loss = (confidence_loss_pos.sum()+confidence_loss_hard.sum())/n_postives.sum().float()
  317. return loc_loss+self.alph*confidence_loss
  318. def test_vgg_base():
  319. model = VGGbase()
  320. x = torch.rand((10, 3, 300, 300))
  321. conv4_3feats, conv7_feats = model(x)
  322. print('conv4_3feats.shape:', conv4_3feats.shape)
  323. print('conv7_feats.shape:', conv7_feats.shape)
  324. def test_AUx_conv():
  325. model = AuxiliaryConvolutions()
  326. # (B, 1024, 19, 19)
  327. x = torch.rand((10, 1024, 19, 19))
  328. conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats = model(x)
  329. print('conv8_2feats.shape:', conv8_2feats.shape)
  330. print('conv9_2feats.shape:', conv9_2feats.shape)
  331. print('conv10_2feats.shape:', conv10_2feats.shape)
  332. print('conv11_2feats.shape:', conv11_2feats.shape)
  333. def test_pre_conv():
  334. n_classes = 21
  335. model = PredictionConvolutions(n_classes)
  336. conv4_3feats = torch.rand((10, 512, 38, 38))
  337. conv7_feats = torch.rand((10, 1024, 19, 19))
  338. conv8_2feats = torch.rand((10, 512, 10, 10))
  339. conv9_2feats = torch.rand((10, 256, 5, 5))
  340. conv10_2feats = torch.rand((10, 256, 3, 3))
  341. conv11_2feats = torch.rand((10, 256, 1, 1))
  342. locs, class_scores = model(conv4_3feats, conv7_feats, conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats)
  343. # print(loc_conv4_3.shape, loc_conv7.shape, loc_conv8_2.shape, loc_conv9_2.shape,
  344. # loc_conv10_2.shape, loc_conv11_2.shape,\
  345. # cl_conv4_3.shape, cl_conv7.shape, cl_conv8_2.shape, cl_conv9_2.shape,
  346. # cl_conv10_2.shape, cl_conv11_2.shape)
  347. print(locs.shape)
  348. print(class_scores.shape)
  349. def test_SSD300():
  350. os.environ["CUDA_VISIBLE_DEVICES"] = '0'
  351. n_classes = 21
  352. model = SSD300(n_classes)
  353. print('==model', model)
  354. x = torch.rand((10, 3, 300, 300))
  355. locs, class_scores = model(x)
  356. print('locs.shape', locs.shape)
  357. print('class_scores.shape', class_scores.shape)
  358. def test_mutiboxloss():
  359. prior_boxes = create_prior_boxes()
  360. loss_model = MultiBoxLoss(prior_boxes)
  361. prediction_locs = torch.rand(2, 8732, 4)
  362. prediction_scores = torch.rand(2, 8732, 21)
  363. boxes = [torch.tensor([[0.1040, 0.1946, 0.9400, 0.9480],
  364. [0.3140, 0.0973, 0.5760, 0.3756]]).to(device),
  365. torch.tensor([[0.0000, 0.6107, 0.8540, 0.7787]]).to(device)]
  366. labels = [torch.tensor([13, 15]).to(device),
  367. torch.tensor([4]).to(device)]
  368. # boxes = torch.tensor([[[1, 2, 3, 4]],
  369. # [[7, 8, 9, 10],
  370. # [4, 5, 6, 7]]])
  371. # labels = torch.tensor([[1],
  372. # [1, 3]])
  373. loss_sclar = loss_model(prediction_locs, prediction_scores, boxes, labels)
  374. print('==loss_sclar',loss_sclar)
  375. def create_prior_boxes():
  376. """创建SSD300的先验框(cx, cy, w, h)
  377. (8prediction_locs, prediction_scores, boxes, labels372,4)个box"""
  378. os.environ["CUDA_VISIBLE_DEVICES"] = '0'
  379. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  380. from math import sqrt
  381. fmap_size = {'conv4_3':38, 'conv7':19, 'conv8_2':10,
  382. 'conv9_2':5, 'conv10_2':3, 'conv11_2':1}
  383. anchor_scale = {'conv4_3':0.1,'conv7':0.2,'conv8_2':0.375,
  384. 'conv9_2':0.55,'conv10_2':0.725,'conv11_2':0.9}
  385. anchor_ratio = {'conv4_3':[1,2,0.5], 'conv7':[1,2,3,0.5,0.33], 'conv8_2':[1,2,3,0.5,0.33],
  386. 'conv9_2':[1,2,3,0.5,0.33], 'conv10_2':[1,2,0.5], 'conv11_2':[1,2,0.5]}
  387. prior_boxes = []
  388. for index,fmap in enumerate(fmap_size):
  389. for i in range(fmap_size[fmap]):
  390. for j in range(fmap_size[fmap]):
  391. cy,cx = (i+0.5)/fmap_size[fmap], (j+0.5)/fmap_size[fmap]
  392. for ratio in anchor_ratio[fmap]:
  393. prior_boxes.append([cx, cy, anchor_scale[fmap]*sqrt(ratio), anchor_scale[fmap]/sqrt(ratio)])
  394. if ratio==1:#添加额外框
  395. try:
  396. extra_scale = sqrt(anchor_scale[fmap]*anchor_scale[fmap_size[index+1]])
  397. except:
  398. extra_scale = 1.
  399. prior_boxes.append([cx, cy, extra_scale, extra_scale])
  400. # print('len(prior_boxes)',len(prior_boxes))
  401. # prior_boxes = [[1,2,3,4],
  402. # [3,4,5,6]]
  403. prior_boxes = torch.FloatTensor(prior_boxes).to(device)
  404. prior_boxes.clamp_(0,1)#防止越界
  405. print('prior_boxes.shape', prior_boxes.shape)
  406. # print(prior_boxes)
  407. return prior_boxes
  408. def decimate(tensor, m):
  409. """
  410. Decimate a tensor by a factor 'm', i.e. downsample by keeping every 'm'th value.
  411. This is used when we convert FC layers to equivalent Convolutional layers, BUT of a smaller size.
  412. :param tensor: tensor to be decimated
  413. :param m: list of decimation factors for each dimension of the tensor; None if not to be decimated along a dimension
  414. :return: decimated tensor
  415. """
  416. assert tensor.dim() == len(m)
  417. for d in range(tensor.dim()):
  418. if m[d] is not None:
  419. tensor = tensor.index_select(dim=d,
  420. index=torch.arange(start=0, end=tensor.size(d), step=m[d]).long())
  421. # print('==tensor.shape:', tensor.shape)
  422. return tensor
  423. def test_fc_conv():
  424. """fc (4096,25088)-->conv (1024,512,3,3)"""
  425. fc_weight_init = torch.rand(4096, 25088)
  426. fc_weight = fc_weight_init.reshape(4096, 512, 7, 7)
  427. m = [4, None, 3, 3]
  428. conv_weight = decimate(fc_weight, m)
  429. print('==conv_weight.shape', conv_weight.shape)
  430. def index_select():
  431. x = torch.linspace(1, 12, steps=12, requires_grad=True).reshape(3, 4)
  432. print('==x', x)
  433. print(x.dtype)
  434. print(x.data)
  435. print(x.data.dtype)
  436. # indices = torch.LongTensor([0, 2])
  437. # y = torch.index_select(x, 0, indices) # 对行操作
  438. # print('==y', y)
  439. #
  440. # z = torch.index_select(x, 1, indices) # 对列操作
  441. # print('==z', z)
  442. #
  443. # z = torch.index_select(y, 1, indices) # 对列操作
  444. # print('==z', z)
  445. if __name__ == '__main__':
  446. os.environ["CUDA_VISIBLE_DEVICES"] = '0'
  447. # test_vgg_base()
  448. # test_AUx_conv()
  449. # test_pre_conv()
  450. # test_fc_conv()
  451. # index_select()
  452. # create_prior_boxes()
  453. # test_SSD300()
  454. test_mutiboxloss()

二.多尺度训练与测试

1.多尺度训练

目的:用不同的尺度去帮助模型适应各种大小的目标,获得对尺寸的鲁棒性。一般是每个batch随机选择一个合适的尺度进行训练即可.

2.多尺度测试

2.1 one-stage 多尺度测试

对单个尺度的结果先进行NMS,在resize成同一个尺度大小在进行一次NMS.先对单个尺度结果进行NMS可以减少推理时间.

2.2 two-stage 多尺度测试

(1) 不同尺度图,通过Backbone+RPN和各自的NMS之后,会得到各自的proposals。再把尺度统一到同一张图的大小上去,然后合并到一起做阈值为0.7的NMS,得到Proposals。

(2) R-CNN阶段依然希望用多尺度,所以需要把proposals分别resize到橙色和绿色的图的尺寸上去,然后各自过R-CNN。后面的步骤与RPN和one stage是一样的,先各自做NMS,然后Resize到统一尺寸后再合并做阈值为0.5的NMS。

参考:
https://mp.weixin.qq.com/s/lBhPjOiT_05WXwxFCXj2mQ

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号