当前位置:   article > 正文



转载自:SPP和SPPF(in YOLOv5) - 知乎 (zhihu.com)



  1. class SPP(nn.Module):
  2. # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
  3. def __init__(self, c1, c2, k=(5, 9, 13)):
  4. super().__init__()
  5. c_ = c1 // 2 # hidden channels
  6. self.cv1 = Conv(c1, c_, 1, 1)
  7. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  8. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  9. def forward(self, x):
  10. x = self.cv1(x)
  11. with warnings.catch_warnings():
  12. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  13. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))


  1. class SPP(nn.Module):
  2. def __init__(self):
  3. super().__init__()
  4. self.maxpool1 = nn.MaxPool2d(kernel_size = 5, stride = 1, padding=2)
  5. self.maxpool2 = nn.MaxPool2d(9, 1, padding=4)
  6. self.maxpool3 = nn.MaxPool2d(13, 1, padding=6)
  7. def forward(self, x):
  8. o1 = self.maxpool1(x)
  9. o2 = self.maxpool2(x)
  10. o3 = self.maxpool3(x)
  11. return torch.cat([x, o1, o2, o3], dim=1)
  12. 假设输入为[B,C,H,W]
  13. 得到输出形状为[B,C*4,H,W] # x, o1, o2, o3的形状都是[B,C,H,W],在dim=1上cat一下,就是通道堆叠了。


  1. class SPPF(nn.Module):
  2. # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  3. def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
  4. super().__init__()
  5. c_ = c1 // 2 # hidden channels
  6. self.cv1 = Conv(c1, c_, 1, 1)
  7. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  8. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  9. def forward(self, x):
  10. x = self.cv1(x)
  11. with warnings.catch_warnings():
  12. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  13. y1 = self.m(x)
  14. y2 = self.m(y1)
  15. return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))


  1. class SPPF(nn.Module):
  2. def __init__(self):
  3. super().__init__()
  4. self.maxpool = nn.MaxPool2d(5, 1, padding=2)
  5. def forward(self, x):
  6. o1 = self.maxpool(x)
  7. o2 = self.maxpool(o1)
  8. o3 = self.maxpool(o2)
  9. return torch.cat([x, o1, o2, o3], dim=1)
  10. SPPF的输出和SPP形状是一样的



  1. import time
  2. import torch
  3. import torch.nn as nn
  4. class SPP(nn.Module):
  5. def __init__(self):
  6. super().__init__()
  7. self.maxpool1 = nn.MaxPool2d(5, 1, padding=2)
  8. self.maxpool2 = nn.MaxPool2d(9, 1, padding=4)
  9. self.maxpool3 = nn.MaxPool2d(13, 1, padding=6)
  10. def forward(self, x):
  11. o1 = self.maxpool1(x)
  12. o2 = self.maxpool2(x)
  13. o3 = self.maxpool3(x)
  14. return torch.cat([x, o1, o2, o3], dim=1)
  15. class SPPF(nn.Module):
  16. def __init__(self):
  17. super().__init__()
  18. self.maxpool = nn.MaxPool2d(5, 1, padding=2)
  19. def forward(self, x):
  20. o1 = self.maxpool(x)
  21. o2 = self.maxpool(o1)
  22. o3 = self.maxpool(o2)
  23. return torch.cat([x, o1, o2, o3], dim=1)
  24. def main():
  25. input_tensor = torch.rand(8, 32, 16, 16)
  26. spp = SPP()
  27. sppf = SPPF()
  28. output1 = spp(input_tensor)
  29. output2 = sppf(input_tensor)
  30. print(torch.equal(output1, output2))
  31. t_start = time.time()
  32. for _ in range(100):
  33. spp(input_tensor)
  34. print(f"spp time: {time.time() - t_start}")
  35. t_start = time.time()
  36. for _ in range(100):
  37. sppf(input_tensor)
  38. print(f"sppf time: {time.time() - t_start}")
  39. if __name__ == '__main__':
  40. main()


spp time: 0.5373051166534424
sppf time: 0.20780706405639648


