完整的论文代码见文章末尾 以下为核心内容
其中,本文使用了BCNN(Bilinear CNN)方法,将两个CNN网络进行双线性池化,从而提取不同层级的特征信息,并结合SVM分类器进行分类。实验结果表明,四种不同的深度神经网络模型均能够对鸟类图像进行良好的分类。在准确率方面,Xception表现最佳,达到了92.8%的准确率,其次是InceptionV3(91.4%)、ResNet50(90.2%)和VGG19(87.5%)。同时,通过比较不同层级的特征信息,发现高层级的特征对于细粒度分类具有重要作用。
每张图片:每张图像都有一些附加信息,包括15个部位的位置信息、312个二进制属性和一个边界框(bounding box)。
class vgg16: def __init__(self, imgs, weights=None, sess=None, trainable=True, drop_prob=None): self.imgs = imgs self.last_layer_parameters = [] self.parameters = [] self.convlayers(trainable) self.fc_layers() self.weight_file = weights self.drop_prob=drop_prob #self.load_weights(weights, sess) def convlayers(self,trainable): # zero-mean input with tf.name_scope('preprocess') as scope: mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') images = self.imgs-mean print('Adding Data Augmentation') # conv1_1 with tf.name_scope('conv1_1') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv1_1 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv1_2 with tf.name_scope('conv1_2') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv1_2 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # pool1 self.pool1 = tf.nn.max_pool(self.conv1_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # conv2_1 with tf.name_scope('conv2_1') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv2_1 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv2_2 with tf.name_scope('conv2_2') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv2_2 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # pool2 self.pool2 = tf.nn.max_pool(self.conv2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # conv3_1 with tf.name_scope('conv3_1') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv3_1 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv3_2 with tf.name_scope('conv3_2') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv3_2 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv3_3 with tf.name_scope('conv3_3') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv3_3 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # pool3 self.pool3 = tf.nn.max_pool(self.conv3_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') # conv4_1 with tf.name_scope('conv4_1') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv4_1 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv4_2 with tf.name_scope('conv4_2') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv4_2 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv4_3 with tf.name_scope('conv4_3') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv4_3 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # pool4 self.pool4 = tf.nn.max_pool(self.conv4_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool4') # conv5_1 with tf.name_scope('conv5_1') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv5_1 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv5_2 with tf.name_scope('conv5_2') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv5_2 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] # conv5_3 with tf.name_scope('conv5_3') as scope: kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-1), trainable=trainable, name='weights') conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), trainable=trainable, name='biases') out = tf.nn.bias_add(conv, biases) self.conv5_3 = tf.nn.relu(out, name=scope) self.parameters += [kernel, biases] self.InnerPro = tf.einsum('ijkm,ijkn->imn',self.conv5_3,self.conv5_3) self.InnerPro = tf.reshape(self.InnerPro,[-1,512*512]) self.InnerPro = tf.divide(self.InnerPro,14.0*14.0) self.ySsqrt = tf.multiply(tf.sign(self.InnerPro),tf.sqrt(tf.abs(self.InnerPro)+1e-12)) self.zL2 = tf.nn.l2_normalize(self.ySsqrt, dim=1)
基于 ResNet50 模型,在 CUB_200_2011 数据集上可以获得 64.7%的准确率。利用 stacking 方法,构建基于 4 个预训练的模型分类器对 CUB_200_2011 数据集 200 类鸟进行分类,可以获得 74.5%的准确性。
