当前位置:   article > 正文

GoogLeNet(InceptionV3)

googlenet(inceptionv3)

InceptionV3InceptionV2的基础上将标准的卷积操作分解为1xN卷积和Nx1卷积两个步骤,这种分解模式可以显著降低参数量和计算量,同时在一定程度上保持了特征提取的能力。

InceptionV3AInceptionV2结构相同,即5x5卷积使用两个3x3的卷积代替,目的是减少参数量和计算量——大卷积分解成小卷积。

InceptionV3BInceptionV2结构中3x3的卷积使用1x33x1的卷积组合来代替,5x5的卷积使用俩个1x33x1的卷积组合来代替,目的也是减少参数量和计算量———小卷积分解为非对称卷积。

采用这种分解在模型的早期网络层上不能有效发挥作用,但是在中等特征图大小(m×m,其中m1220之间的范围)上取得了非常好的效果。

使用3x3的卷积代替5x5的卷积,输入512通道特征图,输出128通道特征图:

参数量:512×3×3×128+128×3×3×128=737280

计算量:512×3×3×128×W×H+128×3×3×128×W×H=737280×W×H

W×H是特征图尺寸,假设卷积层的输入输出特征图尺寸保持一致

使用1x3和3x1的卷积组合代替5x5的卷积,输入512通道特征图,输出128通道特征图:

参数量:512×1×3×128+128×3×1×128+128×1×3×128+128×3×1×128=344064

计算量:512×1×3×128×W×H+128×3×1×128×W×H+128×1×3×128×W×H+128×3×1×128×W×H=344064×W×H

InceptionV3C该结构主要用于扩充通道数,网络变得更宽,该结构被放置在所以放在GoogLeNet(InceptionV3)的最后。

  1. import torch.nn as nn
  2. import torch
  3. from torchsummary import summary
  4. import cv2
  5. from torchvision import transforms
  6. class GoogLeNetV3(nn.Module):
  7. def __init__(self, num_classes=1000, aux_logits=True, init_weights=False):
  8. super(GoogLeNetV3, self).__init__()
  9. self.aux_logits = aux_logits
  10. # 3个3×3卷积替代7×7卷积
  11. self.conv1_1 = BasicConv2d(3, 32, kernel_size=3, stride=2)
  12. self.conv1_2 = BasicConv2d(32, 32, kernel_size=3, stride=1)
  13. self.conv1_3 = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
  14. # 池化层
  15. self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
  16. self.conv2 = BasicConv2d(64, 80, kernel_size=3)
  17. self.conv3 = BasicConv2d(80, 192, kernel_size=3, stride=2)
  18. self.conv4 = BasicConv2d(192, 192, kernel_size=3, padding=1)
  19. self.inception3a = InceptionV3A(192, 64, 48, 64, 64, 96, 32)
  20. self.inception3b = InceptionV3A(256, 64, 48, 64, 64, 96, 64)
  21. self.inception3c = InceptionV3A(288, 64, 48, 64, 64, 96, 64)
  22. self.inception4a = InceptionV3D(288, 0, 384, 384, 64, 96, 0)
  23. self.inception4b = InceptionV3B(768, 192, 128, 192, 128, 192, 192)
  24. self.inception4c = InceptionV3B(768, 192, 160, 192, 160, 192, 192)
  25. self.inception4d = InceptionV3B(768, 192, 160, 192, 160, 192, 192)
  26. self.inception4e = InceptionV3D(768, 0, 384, 384, 64, 128, 0)
  27. if self.aux_logits == True:
  28. self.aux = InceptionAux(in_channels=768, out_channels=num_classes)
  29. self.inception5a = InceptionV3C(1280, 320, 384, 384, 448, 384, 192)
  30. self.inception5b = InceptionV3C(2048, 320, 384, 384, 448, 384, 192)
  31. self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
  32. self.dropout = nn.Dropout(0.5)
  33. self.fc = nn.Linear(2048, num_classes)
  34. if init_weights:
  35. self._initialize_weights()
  36. def forward(self, x):
  37. # #[16,3,224,224]调整为[16,3,299,299]
  38. # hwsize = 299
  39. # x2 = torch.randn(16,3,hwsize,hwsize)
  40. # for m in range(x.shape[0]):
  41. # tmp = x[m, :, :, :] #tensor([3,224,224])
  42. # mintmp = torch.min(tmp)
  43. # maxtmp = torch.max(tmp)
  44. # tmp2 = (tmp- mintmp)/maxtmp #/maxtmp
  45. # if 1:
  46. # toPIL = transforms.ToPILImage() # 这个函数可以将张量转为PIL图片,由小数转为0-255之间的像素值
  47. # img_PIL = toPIL(tmp2) # 张量tensor转换为图片
  48. # img_PIL.save('random.jpg') # 保存图片;img_PIL.show()可以直接显示图片
  49. # img = cv2.imread('random.jpg', 1) # 0代表单通道,1代表3通道
  50. # else:
  51. # img = self.tensor_to_image(tmp2)
  52. #
  53. # img2 = cv2.resize(img, (hwsize, hwsize))
  54. # # toTensor = transforms.ToTensor()#图片转换为张量tensor
  55. # # img_tensor = toTensor(img)
  56. # img3 = torch.tensor(img2).float()
  57. # img4 = img3 / 255.0
  58. # img5 = img4.permute(2,0,1)
  59. # img6 = img5 * maxtmp + mintmp
  60. # x2[m,:,:,:]=img6
  61. # s=1
  62. # x = x2
  63. # N x 3 x 299 x 299
  64. x = self.conv1_1(x)
  65. # N x 32 x 149 x 149
  66. x = self.conv1_2(x)
  67. # N x 32 x 147 x 147
  68. x = self.conv1_3(x)
  69. # N x 64 x 147 x 147
  70. x = self.maxpool1(x)
  71. # N x 64 x 73 x 73
  72. x = self.conv2(x)
  73. # N x 80 x 71 x 71
  74. x = self.conv3(x)
  75. # N x 192 x 35 x 35
  76. x = self.conv4(x)
  77. # N x 192 x 35 x 35
  78. x = self.inception3a(x)
  79. # N x 256 x 35 x 35
  80. x = self.inception3b(x)
  81. # N x 288 x 35 x 35
  82. x = self.inception3c(x)
  83. # N x 288 x 35x 35
  84. x = self.inception4a(x)
  85. # N x 768 x 17 x 17
  86. x = self.inception4b(x)
  87. # N x 768 x 17 x 17
  88. x = self.inception4c(x)
  89. # N x 768 x 17 x 17
  90. x = self.inception4d(x)
  91. # N x 768 x 17 x 17
  92. if self.training and self.aux_logits: # eval model lose this layer
  93. aux = self.aux(x)
  94. # N x 768 x 17 x 17
  95. x = self.inception4e(x)
  96. # N x 1280 x 8 x 8
  97. x = self.inception5a(x)
  98. # N x 2048 x 8 x 8
  99. x = self.inception5b(x)
  100. # N x 2048 x 8 x 8
  101. x = self.avgpool(x)
  102. # N x 2048 x 1 x 1
  103. x = torch.flatten(x, 1)
  104. # N x 2048
  105. x = self.dropout(x)
  106. x = self.fc(x)
  107. # N x 1000(num_classes)
  108. if self.training and self.aux_logits: # 训练阶段使用
  109. return x, aux
  110. return x
  111. # 对模型的权重进行初始化操作
  112. def _initialize_weights(self):
  113. for m in self.modules():
  114. if isinstance(m, nn.Conv2d):
  115. nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
  116. if m.bias is not None:
  117. nn.init.constant_(m.bias, 0)
  118. elif isinstance(m, nn.Linear):
  119. nn.init.normal_(m.weight, 0, 0.01)
  120. nn.init.constant_(m.bias, 0)
  121. # InceptionV3A:BasicConv2d+MaxPool2d
  122. class InceptionV3A(nn.Module):
  123. def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch3x3redX2, ch3x3X2, pool_proj):
  124. super(InceptionV3A, self).__init__()
  125. # 1×1卷积
  126. self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
  127. # 1×1卷积+3×3卷积
  128. self.branch2 = nn.Sequential(
  129. BasicConv2d(in_channels, ch3x3red, kernel_size=1),
  130. BasicConv2d(ch3x3red, ch3x3, kernel_size=3, padding=1) # 保证输出大小等于输入大小
  131. )
  132. # 1×1卷积++3×3卷积+3×3卷积
  133. self.branch3 = nn.Sequential(
  134. BasicConv2d(in_channels, ch3x3redX2, kernel_size=1),
  135. BasicConv2d(ch3x3redX2, ch3x3X2, kernel_size=3, padding=1),
  136. BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=3, padding=1) # 保证输出大小等于输入大小
  137. )
  138. # 3×3池化+1×1卷积
  139. self.branch4 = nn.Sequential(
  140. nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
  141. BasicConv2d(in_channels, pool_proj, kernel_size=1)
  142. )
  143. def forward(self, x):
  144. branch1 = self.branch1(x)
  145. branch2 = self.branch2(x)
  146. branch3 = self.branch3(x)
  147. branch4 = self.branch4(x)
  148. # 拼接
  149. outputs = [branch1, branch2, branch3, branch4]
  150. return torch.cat(outputs, 1)
  151. # InceptionV3B:BasicConv2d+MaxPool2d
  152. class InceptionV3B(nn.Module):
  153. def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch3x3redX2, ch3x3X2, pool_proj):
  154. super(InceptionV3B, self).__init__()
  155. # 1×1卷积
  156. self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
  157. # 1×1卷积+1×3卷积+3×1卷积
  158. self.branch2 = nn.Sequential(
  159. BasicConv2d(in_channels, ch3x3red, kernel_size=1),
  160. BasicConv2d(ch3x3red, ch3x3, kernel_size=[1, 3], padding=[0, 1]),
  161. BasicConv2d(ch3x3, ch3x3, kernel_size=[3, 1], padding=[1, 0]) # 保证输出大小等于输入大小
  162. )
  163. # 1×1卷积+1×3卷积+3×1卷积+1×3卷积+3×1卷积
  164. self.branch3 = nn.Sequential(
  165. BasicConv2d(in_channels, ch3x3redX2, kernel_size=1),
  166. BasicConv2d(ch3x3redX2, ch3x3X2, kernel_size=[1, 3], padding=[0, 1]),
  167. BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=[3, 1], padding=[1, 0]),
  168. BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=[1, 3], padding=[0, 1]),
  169. BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=[3, 1], padding=[1, 0]) # 保证输出大小等于输入大小
  170. )
  171. # 3×3池化+1×1卷积
  172. self.branch4 = nn.Sequential(
  173. nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
  174. BasicConv2d(in_channels, pool_proj, kernel_size=1)
  175. )
  176. def forward(self, x):
  177. branch1 = self.branch1(x)
  178. branch2 = self.branch2(x)
  179. branch3 = self.branch3(x)
  180. branch4 = self.branch4(x)
  181. # 拼接
  182. outputs = [branch1, branch2, branch3, branch4]
  183. return torch.cat(outputs, 1)
  184. # InceptionV3C:BasicConv2d+MaxPool2d
  185. class InceptionV3C(nn.Module):
  186. def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch3x3redX2, ch3x3X2, pool_proj):
  187. super(InceptionV3C, self).__init__()
  188. # 1×1卷积
  189. self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)
  190. # 1×1卷积+1×3卷积+3×1卷积
  191. self.branch2_0 = BasicConv2d(in_channels, ch3x3red, kernel_size=1)
  192. self.branch2_1 = BasicConv2d(ch3x3red, ch3x3, kernel_size=[1, 3], padding=[0, 1])
  193. self.branch2_2 = BasicConv2d(ch3x3red, ch3x3, kernel_size=[3, 1], padding=[1, 0])
  194. # 1×1卷积+3×3卷积+1×3卷积+3×1卷积
  195. self.branch3_0 = nn.Sequential(
  196. BasicConv2d(in_channels, ch3x3redX2, kernel_size=1),
  197. BasicConv2d(ch3x3redX2, ch3x3X2, kernel_size=3, padding=1),
  198. )
  199. self.branch3_1 = BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=[1, 3], padding=[0, 1])
  200. self.branch3_2 = BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=[3, 1], padding=[1, 0])
  201. # 3×3池化+1×1卷积
  202. self.branch4 = nn.Sequential(
  203. nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
  204. BasicConv2d(in_channels, pool_proj, kernel_size=1)
  205. )
  206. def forward(self, x):
  207. branch1 = self.branch1(x)
  208. branch2_0 = self.branch2_0(x)
  209. branch2 = torch.cat([self.branch2_1(branch2_0), self.branch2_2(branch2_0)], dim=1)
  210. branch3_0 = self.branch3_0(x)
  211. branch3 = torch.cat([self.branch3_1(branch3_0), self.branch3_2(branch3_0)], dim=1)
  212. branch4 = self.branch4(x)
  213. # 拼接
  214. outputs = [branch1, branch2, branch3, branch4]
  215. return torch.cat(outputs, 1)
  216. # InceptionV3D:BasicConv2d+MaxPool2d
  217. class InceptionV3D(nn.Module):
  218. def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch3x3redX2, ch3x3X2, pool_proj):
  219. super(InceptionV3D, self).__init__()
  220. # ch1x1:没有1×1卷积
  221. # 1×1卷积+3×3卷积,步长为2
  222. self.branch1 = nn.Sequential(
  223. BasicConv2d(in_channels, ch3x3red, kernel_size=1),
  224. BasicConv2d(ch3x3red, ch3x3, kernel_size=3, stride=2)
  225. )
  226. # 1×1卷积+3×3卷积+3×3卷积,步长为2
  227. self.branch2 = nn.Sequential(
  228. BasicConv2d(in_channels, ch3x3redX2, kernel_size=1),
  229. BasicConv2d(ch3x3redX2, ch3x3X2, kernel_size=3, padding=1), # 保证输出大小等于输入大小
  230. BasicConv2d(ch3x3X2, ch3x3X2, kernel_size=3, stride=2)
  231. )
  232. # 3×3池化,步长为2
  233. self.branch3 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2))
  234. # pool_proj:池化层后不再接卷积层
  235. def forward(self, x):
  236. branch1 = self.branch1(x)
  237. branch2 = self.branch2(x)
  238. branch3 = self.branch3(x)
  239. # 拼接
  240. outputs = [branch1,branch2, branch3]
  241. return torch.cat(outputs, 1)
  242. # 辅助分类器:AvgPool2d+BasicConv2d+Linear+dropout
  243. class InceptionAux(nn.Module):
  244. def __init__(self, in_channels, out_channels):
  245. super(InceptionAux, self).__init__()
  246. self.averagePool = nn.AvgPool2d(kernel_size=5, stride=3)
  247. self.conv1 = BasicConv2d(in_channels=in_channels, out_channels=128, kernel_size=1)
  248. self.conv2 = BasicConv2d(in_channels=128, out_channels=768, kernel_size=5, stride=1)
  249. self.dropout = nn.Dropout(p=0.7)
  250. self.linear = nn.Linear(in_features=768, out_features=out_channels)
  251. def forward(self, x):
  252. # N x 768 x 17 x 17
  253. x = self.averagePool(x)
  254. # N x 768 x 5 x 5
  255. x = self.conv1(x)
  256. # N x 128 x 5 x 5
  257. x = self.conv2(x)
  258. # N x 768 x 1 x 1
  259. x = x.view(x.size(0), -1)
  260. # N x 768
  261. out = self.linear(self.dropout(x))
  262. # N x num_classes
  263. return out
  264. # 卷积组: Conv2d+BN+ReLU
  265. class BasicConv2d(nn.Module):
  266. def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
  267. super(BasicConv2d, self).__init__()
  268. self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
  269. self.bn = nn.BatchNorm2d(out_channels)
  270. self.relu = nn.ReLU(inplace=True)
  271. def forward(self, x):
  272. x = self.conv(x)
  273. x = self.bn(x)
  274. x = self.relu(x)
  275. return x
  276. if __name__ == '__main__':
  277. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  278. model = GoogLeNetV3().to(device)
  279. summary(model, input_size=(3, 299, 299))

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/681118
推荐阅读
相关标签
  

闽ICP备14008679号