当前位置:   article > 正文

百度飞浆图像分割课程 笔记03:U-Net 以及代码实现_unext 网络 飞浆

unext 网络 飞浆

Ⅰ. U-Net框架
在这里插入图片描述
在这里插入图片描述
Ⅱ. 特征融合
U-Net和FCN的区别:

  • FCN特征融合方式为相加
  • U-Net特征融合方式为concat
  • U-Net的backbone并不是完全的VGG的格式
    具体操作为:copy and crop + concat
    在这里插入图片描述

Ⅲ. 输出预测图
上采样结束后,要输出预测图,首先假设分割类数为num_classes,做像素级分类。步骤:

  1. 1×1卷积,将64维变成num_classes。
  2. 对每个在(h,w)中的点的num_classes维度上,取一个softmax,得到概率分布,取最大的概率分布响应argmax,得到最后的分割结果。
  3. 在训练的时候,不需要argmax
    在这里插入图片描述

Ⅳ. 构建U-Net网络
在这里插入图片描述

U-Net代码:

from numpy.core.defchararray import decode, mod
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Conv2DTranspose

class Encoder(Layer):
    def __init__(self, num_channels, num_filters):
        super(Encoder, self).__init__()
        # TODO:encoder contains:
        # 3×3 conv + bn + relu
        # 3×3 conv + bn + relu
        # 2×2 pool
        # return features before and after pool  
        self.conv1 = Conv2D(num_channels=num_channels,
                            num_filters=num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)  # 3×3卷积的时候,padding=1的时候,尺寸不会变
        self.bn1 = BatchNorm(num_filters, act='relu')

        self.conv2 = Conv2D(num_channels=num_filters,
                            num_filters=num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)
        self.bn2 = BatchNorm(num_filters, act='relu')

        self.pool = Pool2D(pool_size=2, pool_stride=2, pool_type='max', ceil_mode=True)

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x) # 灰色箭头concat
        x_pooled = self.pool(x)
        
        return x, x_pooled


class Decoder(Layer):
    def __init__(self, num_channels, num_filters):
        super(Decoder, self).__init__()
        # TODO:encoder contains:
        # 2×2 transpose conv, stride=2, p=0 (makes feature map 2× larger)
        # 3×3 conv + bn + relu
        # 3×3 conv + bn + relu
        self.up = Conv2DTranspose(num_channels=num_channels,    # 1024->512
                                  num_filters=num_filters,
                                  filter_size=2,
                                  stride=2)
        
        self.conv1 = Conv2D(num_channels=num_channels,  # 1024
                            num_filters=num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)

        self.bn1 = BatchNorm(num_channels=num_filters, act='relu')

        self.conv2 = Conv2D(num_channels=num_filters,
                            num_filters=num_filters,
                            filter_size=3,
                            stride=1,
                            padding=1)

        self.bn2 = BatchNorm(num_channels=num_filters, act='relu')
    
    def forward(self, inputs_prev, inputs):
        # TODO:forward contains an pad2d and concat
        # 原论文是input_prev进行crop,这里是对x进行padding,目的一样,就是把保证HW一致,进行concat
        x = self.up(inputs)
        # NCHW
        h_diff = (inputs_prev.shape[2] - x.shape[2])
        w_diff = (inputs_prev.shape[3] - x.shape[3])
        x = fluid.layers.pad2d(x, paddings=[h_diff//2, h_diff - h_diff//2, w_diff//2, w_diff - w_diff//2])
        # axis=1为C。NCHW,把channel concat
        x = fluid.layers.concat([inputs_prev, x], axis=1)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        return x


class UNet(Layer):
    def __init__(self, num_classes=59):
        super(UNet, self).__init__()
        # encoder: 3->64->128->256->512
        # mid: 512->1024->1024

        # TODO: 4 encoders, 4 decoders, and mid layers contain 2x (1x1conv+bn+relu)
        self.down1 = Encoder(num_channels=3, num_filters=64)
        self.down2 = Encoder(num_channels=64, num_filters=128)
        self.down3 = Encoder(num_channels=128, num_filters=256)
        self.down4 = Encoder(num_channels=256, num_filters=512)

        # 原论文应该是 3x3 padding=1,stride=1,这里使用1x1卷积
        self.midconv1 = Conv2D(num_channels=512, num_filters=1024, filter_size=1, padding =0, stride=1)
        self.bn1 = BatchNorm(num_channels=1024, act='relu')
        self.midconv2 = Conv2D(num_channels=1024, num_filters=1024, filter_size=1, padding=0, stride=1)
        self.bn2 = BatchNorm(num_channels=1024, act='relu')

        self.up1 = Decoder(num_channels=1024, num_filters=512)
        self.up2 = Decoder(num_channels=512, num_filters=256)
        self.up3 = Decoder(num_channels=256, num_filters=128)
        self.up4 = Decoder(num_channels=128, num_filters=64)

        # last_conv: channel:64->num_classes
        self.last_conv = Conv2D(num_channels=64, num_filters=num_classes, filter_size=1)


    def forward(self, inputs):
        # encoder layer
        print('encoder layer:')
        x1, x = self.down1(inputs)
        print('input_pred:',x1.shape, 'x_pooled:', x.shape)
        x2, x = self.down2(x)
        print('input_pred:',x2.shape, 'x_pooled:', x.shape)
        x3, x = self.down3(x)
        print('input_pred:',x3.shape, 'x_pooled:', x.shape)
        x4, x = self.down4(x)
        print('input_pred:',x4.shape, 'x_pooled:', x.shape)

        # middle layer
        x = self.midconv1(x)
        x = self.bn1(x)
        x = self.midconv2(x)
        x = self.bn2(x)

        # decoder layer
        print('decoder layer:')
        x = self.up1(x4, x)
        print('up1_input_pred:',x4.shape, 'up1:', x.shape)
        x = self.up2(x3, x)
        print('up2_input_pred:',x3.shape, 'up2:', x.shape)
        x = self.up3(x2, x)
        print('up3_input_pred:',x2.shape, 'up3:', x.shape)
        x = self.up4(x1, x)
        print('up4_input_pred:',x1.shape, 'up4:', x.shape)

        x = self.last_conv(x)
        print('out_put:', x.shape)

        return x



def main():
    with fluid.dygraph.guard(fluid.CPUPlace()):
        model = UNet(num_classes=59)
        x_data = np.random.rand(1, 3, 123, 123).astype(np.float32)
        x_data = to_variable(x_data)
        output = model(x_data)
        output = output.numpy()

if __name__ == "__main__":
    main()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
aistudio@jupyter-559108-2508636:~$ python ./work/U-Net.py 
encoder layer:
input_pred: [1, 64, 123, 123] x_pooled: [1, 64, 62, 62]
input_pred: [1, 128, 62, 62] x_pooled: [1, 128, 31, 31]
input_pred: [1, 256, 31, 31] x_pooled: [1, 256, 16, 16]
input_pred: [1, 512, 16, 16] x_pooled: [1, 512, 8, 8]
decoder layer:
up1_input_pred: [1, 512, 16, 16] up1: [1, 512, 16, 16]
up2_input_pred: [1, 256, 31, 31] up2: [1, 256, 31, 31]
up3_input_pred: [1, 128, 62, 62] up3: [1, 128, 62, 62]
up4_input_pred: [1, 64, 123, 123] up4: [1, 64, 123, 123]
out_put: [1, 59, 123, 123]
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/87742
推荐阅读
相关标签
  

闽ICP备14008679号