赞
踩
卷积可以认为是一种有效提取图像特征的方法。
一般会用一个正方形的 卷积核,按指定步长,在输入特征图上滑动,遍历输入特征图中的每个像素点。每一个步长, 卷积核会与输入特征图出现重合区域,重合区域对应元素相乘、求和再加上偏置项得到输出 特征的一个像素点。
卷积核通道数与输入特征的通道数一致。
卷积核的个数决定输出特征图的深度。
感受野:卷积神经网络各输出层每个像素点在原始图像上 的映射区域大小
全零填充(padding):为了保持输出图像尺寸与输入图像一致,经常会在输入图像 周围进行全零填充,padding = ‘SAME’时进行填充,padding = ‘VALID’时则不进行填充。
- tf.keras.layers.Conv2D(
- input_shape = (高, 宽, 通道数), #仅在第一层有
- filters = 卷积核个数,
- kernel_size = 卷积核尺寸,
- strides = 卷积步长,
- padding = 'SAME' or 'VALID',
- activation = 'relu' or 'sigmoid' or 'tanh' or 'softmax'等 #如有 BN 则此处不用写
- )
Batch Normalization(批标准化):对一小批数据在网络各层的输出做标准化处理,都调整到均值为 0,方差为 1 的标准正态分 布,其目的是解决神经网络中梯度消失的问题。(当 training = True 时,BN 操作采用当前 batch 的均值和标准差;当 training = False 时,BN 操作采用滑动平均的均值和标准差。)
tf.keras.layers.BatchNormalization()
池化(pooling):池化的作用是减少特征数量(降维)。最大值池化可提取图片纹 理,均值池化可保留背景特征。
- tf.keras.layers.MaxPool2D(
- pool_size = 池化核尺寸,
- strides = 池化步长,
- padding = 'SAME' or 'VALID'
- )
-
- tf.keras.layers.AveragePooling2D(
- pool_size = 池化核尺寸,
- strides = 池化步长,
- padding = 'SAME' or 'VALID'
- )
舍弃(Dropout):在神经网络的训练过程中,将一部分神经元按照一定概率从神经 网络中暂时舍弃,使用时被舍弃的神经元恢复链接.
tf.keras.layers.Dropout
核心思路为在 CNN 中利用卷积核(kernel)提取特征后,送入全连接网络。
利用 tf.keras.Sequential模型以及 class 定义两种方式都可以构建,但后者在实际应用中更加常用,因为一些复杂的网络经常会有 Sequential 模型无法表达的结构或设计。
- class Baseline(Model):
- def __init__(self):
- super(Baseline, self).__init__()
- self.c1 = Conv2D(filters=6, kernel_size=(5, 5), padding='same') # 卷积层
- self.b1 = BatchNormalization() # BN层
- self.a1 = Activation('relu') # 激活层
- self.p1 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same') # 池化层
- self.d1 = Dropout(0.2) # dropout层
-
- self.flatten = Flatten()
- self.f1 = Dense(128, activation='relu')
- self.d2 = Dropout(0.2)
- self.f2 = Dense(10, activation='softmax')
-
- def call(self, x):
- x = self.c1(x)
- x = self.b1(x)
- x = self.a1(x)
- x = self.p1(x)
- x = self.d1(x)
-
- x = self.flatten(x)
- x = self.f1(x)
- x = self.d2(x)
- y = self.f2(x)
- return y
-
-
- model = Baseline()
借鉴点:共享卷积核,减少网络参数。
经典思路:卷积提取特征→全连接分类
- class LeNet5(Model):
- def __init__(self):
- super(LeNet5, self).__init__()
- self.c1 = Conv2D(filters=6, kernel_size=(5, 5),
- activation='sigmoid')
- self.p1 = MaxPool2D(pool_size=(2, 2), strides=2)
-
- self.c2 = Conv2D(filters=16, kernel_size=(5, 5),
- activation='sigmoid')
- self.p2 = MaxPool2D(pool_size=(2, 2), strides=2)
-
- self.flatten = Flatten()
- self.f1 = Dense(120, activation='sigmoid')
- self.f2 = Dense(84, activation='sigmoid')
- self.f3 = Dense(10, activation='softmax')
-
- def call(self, x):
- x = self.c1(x)
- x = self.p1(x)
-
- x = self.c2(x)
- x = self.p2(x)
-
- x = self.flatten(x)
- x = self.f1(x)
- x = self.f2(x)
- y = self.f3(x)
- return y
-
-
- model = LeNet5()
借鉴点:激活函数使用 Relu,提升训练速度;Dropout 防止过拟合。
- class AlexNet8(Model):
- def __init__(self):
- super(AlexNet8, self).__init__()
- self.c1 = Conv2D(filters=96, kernel_size=(3, 3))
- self.b1 = BatchNormalization()
- self.a1 = Activation('relu')
- self.p1 = MaxPool2D(pool_size=(3, 3), strides=2)
-
- self.c2 = Conv2D(filters=256, kernel_size=(3, 3))
- self.b2 = BatchNormalization()
- self.a2 = Activation('relu')
- self.p2 = MaxPool2D(pool_size=(3, 3), strides=2)
-
- self.c3 = Conv2D(filters=384, kernel_size=(3, 3), padding='same',
- activation='relu')
-
- self.c4 = Conv2D(filters=384, kernel_size=(3, 3), padding='same',
- activation='relu')
-
- self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same',
- activation='relu')
- self.p3 = MaxPool2D(pool_size=(3, 3), strides=2)
-
- self.flatten = Flatten()
- self.f1 = Dense(2048, activation='relu')
- self.d1 = Dropout(0.5)
- self.f2 = Dense(2048, activation='relu')
- self.d2 = Dropout(0.5)
- self.f3 = Dense(10, activation='softmax')
-
- def call(self, x):
- x = self.c1(x)
- x = self.b1(x)
- x = self.a1(x)
- x = self.p1(x)
-
- x = self.c2(x)
- x = self.b2(x)
- x = self.a2(x)
- x = self.p2(x)
-
- x = self.c3(x)
-
- x = self.c4(x)
-
- x = self.c5(x)
- x = self.p3(x)
-
- x = self.flatten(x)
- x = self.f1(x)
- x = self.d1(x)
- x = self.f2(x)
- x = self.d2(x)
- y = self.f3(x)
- return y
-
- model = AlexNet8()
借鉴点:小卷积核减少参数的同时,提高识别准确率;网络结构规整,适合并行加速。
- class VGG16(Model):
- def __init__(self):
- super(VGG16, self).__init__()
- self.c1 = Conv2D(filters=64, kernel_size=(3, 3), padding='same') # 卷积层1
- self.b1 = BatchNormalization() # BN层1
- self.a1 = Activation('relu') # 激活层1
- self.c2 = Conv2D(filters=64, kernel_size=(3, 3), padding='same', )
- self.b2 = BatchNormalization() # BN层1
- self.a2 = Activation('relu') # 激活层1
- self.p1 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')
- self.d1 = Dropout(0.2) # dropout层
-
- self.c3 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
- self.b3 = BatchNormalization() # BN层1
- self.a3 = Activation('relu') # 激活层1
- self.c4 = Conv2D(filters=128, kernel_size=(3, 3), padding='same')
- self.b4 = BatchNormalization() # BN层1
- self.a4 = Activation('relu') # 激活层1
- self.p2 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')
- self.d2 = Dropout(0.2) # dropout层
-
- self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
- self.b5 = BatchNormalization() # BN层1
- self.a5 = Activation('relu') # 激活层1
- self.c6 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
- self.b6 = BatchNormalization() # BN层1
- self.a6 = Activation('relu') # 激活层1
- self.c7 = Conv2D(filters=256, kernel_size=(3, 3), padding='same')
- self.b7 = BatchNormalization()
- self.a7 = Activation('relu')
- self.p3 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')
- self.d3 = Dropout(0.2)
-
- self.c8 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b8 = BatchNormalization() # BN层1
- self.a8 = Activation('relu') # 激活层1
- self.c9 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b9 = BatchNormalization() # BN层1
- self.a9 = Activation('relu') # 激活层1
- self.c10 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b10 = BatchNormalization()
- self.a10 = Activation('relu')
- self.p4 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')
- self.d4 = Dropout(0.2)
-
- self.c11 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b11 = BatchNormalization() # BN层1
- self.a11 = Activation('relu') # 激活层1
- self.c12 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b12 = BatchNormalization() # BN层1
- self.a12 = Activation('relu') # 激活层1
- self.c13 = Conv2D(filters=512, kernel_size=(3, 3), padding='same')
- self.b13 = BatchNormalization()
- self.a13 = Activation('relu')
- self.p5 = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')
- self.d5 = Dropout(0.2)
-
- self.flatten = Flatten()
- self.f1 = Dense(512, activation='relu')
- self.d6 = Dropout(0.2)
- self.f2 = Dense(512, activation='relu')
- self.d7 = Dropout(0.2)
- self.f3 = Dense(10, activation='softmax')
-
- def call(self, x):
- x = self.c1(x)
- x = self.b1(x)
- x = self.a1(x)
- x = self.c2(x)
- x = self.b2(x)
- x = self.a2(x)
- x = self.p1(x)
- x = self.d1(x)
-
- x = self.c3(x)
- x = self.b3(x)
- x = self.a3(x)
- x = self.c4(x)
- x = self.b4(x)
- x = self.a4(x)
- x = self.p2(x)
- x = self.d2(x)
-
- x = self.c5(x)
- x = self.b5(x)
- x = self.a5(x)
- x = self.c6(x)
- x = self.b6(x)
- x = self.a6(x)
- x = self.c7(x)
- x = self.b7(x)
- x = self.a7(x)
- x = self.p3(x)
- x = self.d3(x)
-
- x = self.c8(x)
- x = self.b8(x)
- x = self.a8(x)
- x = self.c9(x)
- x = self.b9(x)
- x = self.a9(x)
- x = self.c10(x)
- x = self.b10(x)
- x = self.a10(x)
- x = self.p4(x)
- x = self.d4(x)
-
- x = self.c11(x)
- x = self.b11(x)
- x = self.a11(x)
- x = self.c12(x)
- x = self.b12(x)
- x = self.a12(x)
- x = self.c13(x)
- x = self.b13(x)
- x = self.a13(x)
- x = self.p5(x)
- x = self.d5(x)
-
- x = self.flatten(x)
- x = self.f1(x)
- x = self.d6(x)
- x = self.f2(x)
- x = self.d7(x)
- y = self.f3(x)
- return y
-
- model = VGG16()
借鉴点:一层内使用不同尺寸的卷积核,提升感知力(通过 padding 实现输出特征面积一致); 使用 1 * 1 卷积核,改变输出特征 channel 数(减少网络参数)。
缺点:当网络深度不断增加时,训练会 十分困难,甚至无法收敛
- class ConvBNRelu(Model):
- def __init__(self, ch, kernelsz=3, strides=1, padding='same'):
- super(ConvBNRelu, self).__init__()
- self.model = tf.keras.models.Sequential([
- Conv2D(ch, kernelsz, strides=strides, padding=padding),
- BatchNormalization(),
- Activation('relu')
- ])
-
- def call(self, x):
- x = self.model(x, training=False)
- return x
-
-
- class InceptionBlk(Model):
- def __init__(self, ch, strides=1):
- super(InceptionBlk, self).__init__()
- self.ch = ch
- self.strides = strides
- self.c1 = ConvBNRelu(ch, kernelsz=1, strides=strides)
- self.c2_1 = ConvBNRelu(ch, kernelsz=1, strides=strides)
- self.c2_2 = ConvBNRelu(ch, kernelsz=3, strides=1)
- self.c3_1 = ConvBNRelu(ch, kernelsz=1, strides=strides)
- self.c3_2 = ConvBNRelu(ch, kernelsz=5, strides=1)
- self.p4_1 = MaxPool2D(3, strides=1, padding='same')
- self.c4_2 = ConvBNRelu(ch, kernelsz=1, strides=strides)
-
- def call(self, x):
- x1 = self.c1(x)
- x2_1 = self.c2_1(x)
- x2_2 = self.c2_2(x2_1)
- x3_1 = self.c3_1(x)
- x3_2 = self.c3_2(x3_1)
- x4_1 = self.p4_1(x)
- x4_2 = self.c4_2(x4_1)
- # concat along axis=channel
- x = tf.concat([x1, x2_2, x3_2, x4_2], axis=3)
- return x
-
-
- class Inception10(Model):
- def __init__(self, num_blocks, num_classes, init_ch=16, **kwargs):
- super(Inception10, self).__init__(**kwargs)
- self.in_channels = init_ch
- self.out_channels = init_ch
- self.num_blocks = num_blocks
- self.init_ch = init_ch
- self.c1 = ConvBNRelu(init_ch)
- self.blocks = tf.keras.models.Sequential()
- for block_id in range(num_blocks):
- for layer_id in range(2):
- if layer_id == 0:
- block = InceptionBlk(self.out_channels, strides=2)
- else:
- block = InceptionBlk(self.out_channels, strides=1)
- self.blocks.add(block)
- # enlarger out_channels per block
- self.out_channels *= 2
- self.p1 = GlobalAveragePooling2D()
- self.f1 = Dense(num_classes, activation='softmax')
-
- def call(self, x):
- x = self.c1(x)
- x = self.blocks(x)
- x = self.p1(x)
- y = self.f1(x)
- return y
-
-
- model = Inception10(num_blocks=2, num_classes=10)
借鉴点:层间残差跳连,引入前方信息,减少梯度消失,使神经网络层数变身成为可能。
- class ResnetBlock(Model):
-
- def __init__(self, filters, strides=1, residual_path=False):
- super(ResnetBlock, self).__init__()
- self.filters = filters
- self.strides = strides
- self.residual_path = residual_path
-
- self.c1 = Conv2D(filters, (3, 3), strides=strides, padding='same', use_bias=False)
- self.b1 = BatchNormalization()
- self.a1 = Activation('relu')
-
- self.c2 = Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False)
- self.b2 = BatchNormalization()
-
- # residual_path为True时,对输入进行下采样,即用1x1的卷积核做卷积操作,保证x能和F(x)维度相同,顺利相加
- if residual_path:
- self.down_c1 = Conv2D(filters, (1, 1), strides=strides, padding='same', use_bias=False)
- self.down_b1 = BatchNormalization()
-
- self.a2 = Activation('relu')
-
- def call(self, inputs):
- residual = inputs # residual等于输入值本身,即residual=x
- # 将输入通过卷积、BN层、激活层,计算F(x)
- x = self.c1(inputs)
- x = self.b1(x)
- x = self.a1(x)
-
- x = self.c2(x)
- y = self.b2(x)
-
- if self.residual_path:
- residual = self.down_c1(inputs)
- residual = self.down_b1(residual)
-
- out = self.a2(y + residual) # 最后输出的是两部分的和,即F(x)+x或F(x)+Wx,再过激活函数
- return out
-
-
- class ResNet18(Model):
-
- def __init__(self, block_list, initial_filters=64): # block_list表示每个block有几个卷积层
- super(ResNet18, self).__init__()
- self.num_blocks = len(block_list) # 共有几个block
- self.block_list = block_list
- self.out_filters = initial_filters
- self.c1 = Conv2D(self.out_filters, (3, 3), strides=1, padding='same', use_bias=False)
- self.b1 = BatchNormalization()
- self.a1 = Activation('relu')
- self.blocks = tf.keras.models.Sequential()
- # 构建ResNet网络结构
- for block_id in range(len(block_list)): # 第几个resnet block
- for layer_id in range(block_list[block_id]): # 第几个卷积层
-
- if block_id != 0 and layer_id == 0: # 对除第一个block以外的block的输入采样
- block = ResnetBlock(self.out_filters, strides=2, residual_path=True)
- else:
- block = ResnetBlock(self.out_filters, residual_path=False)
- self.blocks.add(block) # 将构建好的block加入resnet
- self.out_filters *= 2 # 下一个block的卷积核数是上一个block的2倍
- self.p1 = tf.keras.layers.GlobalAveragePooling2D()
- self.f1 = tf.keras.layers.Dense(10, activation='softmax',
- kernel_regularizer=tf.keras.regularizers.l2())
-
- def call(self, inputs):
- x = self.c1(inputs)
- x = self.b1(x)
- x = self.a1(x)
- x = self.blocks(x)
- x = self.p1(x)
- y = self.f1(x)
- return y
-
-
- model = ResNet18([2, 2, 2, 2])
另外,些训练方法和超参数的设定对模型训练结果的影响是相当显著的,以 ResNet18 为例,如果采取合适的训练技巧,cifar10的识别准确率是足以突破 90%的。所以,在神经网络的训练中,除了选择合适的模型以外,如何更好地训练一个模型也是一个非常值得探究的问题。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。