赞
踩
接上篇【玩转yolov5】使用TensorRT C++ API搭建v4.0网络结构(1)。上篇说完了CBL(conv+bn+激活),现在进入到CSP结构。yolov5s中设计了两种CSP结构,以yolov5s网络为例,CSP1_X结构应用于Backbone主干网络,另一种CSP2_X结构应用于Neck中。
注意:上图仅供参考,yolov5s-v4.0实际结构以代码为准,有差异。
CSP结构的实现统一在下面的C3函数中,通过输入参数控制宽度,深度以及结构类型等。
- auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
-
- ILayer* C3(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) {
- int c_ = (int)((float)c2 * e); //e:expand param
- auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
- auto cv2 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv2");
- ITensor *y1 = cv1->getOutput(0);
- for (int i = 0; i < n; i++) {
- auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i));
- y1 = b->getOutput(0);
- }
-
- ITensor* inputTensors[] = { y1, cv2->getOutput(0) };
- auto cat = network->addConcatenation(inputTensors, 2);
-
- auto cv3 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv3");
- return cv3;
- }
get_width(...)和get_depth(...)分别用来计算CSP结构的宽度和深度,yolov5s结构中gw,gd系数分别为0.50和0.33,所以get_width(128,gw)等于64,get_depth(3,gd)等于1。
- //Using gw to control the number of kernels that must be multiples of 8
- static int get_width(int x, float gw, int divisor = 8) {
- //return math.ceil(x / divisor) * divisor
- if (int(x * gw) % divisor == 0) {
- return int(x * gw);
- }
- return (int(x * gw / divisor) + 1) * divisor;
- }
-
- static int get_depth(int x, float gd) {
- if (x == 1) {
- return 1;
- } else {
- return round(x * gd) > 1 ? round(x * gd) : 1;
- }
- }
CSP1_X中的X表示bottleneck的数量,CSP1_1即包含1个bottleneck,这里bottleneck由函数bottleneck(...)生成。
- ILayer* bottleneck(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, bool shortcut, int g, float e, std::string lname) {
- auto cv1 = convBlock(network, weightMap, input, (int)((float)c2 * e), 1, 1, 1, lname + ".cv1");
- auto cv2 = convBlock(network, weightMap, *cv1->getOutput(0), c2, 3, 1, g, lname + ".cv2");
- if (shortcut && c1 == c2) {
- auto ew = network->addElementWise(input, *cv2->getOutput(0), ElementWiseOperation::kSUM);
- return ew;
- }
- return cv2;
- }
这是一个经典的bottleneck结构,只是要注意这里的conv实际上是CBL,包含(conv+bn+silu)。这里的输入参数e固定为1,意味着第1个1x1的卷积不会降低通道数。一旦shortcut为false,这里的bottleneck就演变成了两个CBL的串联,X个残差组件变成了2*X个CBL。
整个Backbone中除了focus,CBL,CSP1_X之外还有一个重要的结构就是SPP。
SPP(Spatial Pyramid Pooling) 原理如上图,feature maps 是经过三个pooling窗口(蓝色,青绿,银灰的窗口) 进行pooling,将分别得到的结果在channel维度进行concat。SPP 可以增大感受野,有助于解决anchor和feature map的对齐问题。SPP这个结构就是通过不同kernel size的pooling抽取不同尺度特征,再进行叠加进行特征融合。
- auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8"); //SPP
-
- ILayer* SPP(INetworkDefinition *network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2, int k1, int k2, int k3, std::string lname) {
- //kernel size:1x1,5x5,9x9,13x13
- //concat the pooling result of different kernel size
- int c_ = c1 / 2;
- auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1");
-
- auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k1, k1 });
- pool1->setPaddingNd(DimsHW{ k1 / 2, k1 / 2 });
- pool1->setStrideNd(DimsHW{ 1, 1 });
- auto pool2 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k2, k2 });
- pool2->setPaddingNd(DimsHW{ k2 / 2, k2 / 2 });
- pool2->setStrideNd(DimsHW{ 1, 1 });
- auto pool3 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k3, k3 });
- pool3->setPaddingNd(DimsHW{ k3 / 2, k3 / 2 });
- pool3->setStrideNd(DimsHW{ 1, 1 });
-
- ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) };
- auto cat = network->addConcatenation(inputTensors, 4);
-
- auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2");
- return cv2;
- }
在yolov5里pooling的kernel size分别是1x1, 5x5, 9x9, 13x13。在SPP中首先通过一个1x1卷积将通道减半,再将结果做不同尺度的Pooling,最后将Pooling的结果和通道减半后的结果进行拼接,拼接后的feature map还要再经过一个CBL。yolov5s-v4.0的backbone部分核心代码如下。
- /* ------ yolov5 backbone------ */
- auto focus0 = focus(network, weightMap, *data, 3, get_width(64, gw), 3, "model.0"); //get_width,calculate the number of conv kernels
- auto conv1 = convBlock(network, weightMap, *focus0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1"); //CBL
- auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2"); //CSP1_1,get_depth(3,gd) => 1
- auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3"); //CBL
- auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(9, gd), true, 1, 0.5, "model.4"); //CSP1_3,get_depth(9,gd) => 3
- auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5"); //CBL
- auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6"); //CSP1_3,get_depth(9,gd) => 3
- auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7"); //CBL
- auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, 9, 13, "model.8"); //SPP
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。