VGG之所以经典,在于它首次将深度学习做得非常“深”,达到了16-19层,同时,它用了非常“小”的卷积核 ( 3 × 3 ) \left(3\times3\right) (3×3)。
""" vgg16的网络部分 """ import tensorflow as tf # 创建slim对象 slim = tf.contrib.slim def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): with tf.variable_scope(scope, 'vgg_16', [inputs]): # 建立vgg_16的网络 # conv1两次[3,3]卷积网络,输出的特征层为64,输出为(224,224,64) net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 2X2最大池化,输出net为(112,112,64) net = slim.max_pool2d(net, [2, 2], scope='pool1') # conv2两次[3,3]卷积网络,输出的特征层为128,输出net为(112,112,128) net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 2X2最大池化,输出net为(56,56,128) net = slim.max_pool2d(net, [2, 2], scope='pool2') # conv3三次[3,3]卷积网络,输出的特征层为256,输出net为(56,56,256) net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 2X2最大池化,输出net为(28,28,256) net = slim.max_pool2d(net, [2, 2], scope='pool3') # conv3三次[3,3]卷积网络,输出的特征层为256,输出net为(28,28,512) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 2X2最大池化,输出net为(14,14,512) net = slim.max_pool2d(net, [2, 2], scope='pool4') # conv3三次[3,3]卷积网络,输出的特征层为256,输出net为(14,14,512) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 2X2最大池化,输出net为(7,7,512) net = slim.max_pool2d(net, [2, 2], scope='pool5') # 利用卷积的方式模拟全连接层,效果等同,输出net为(1,1,4096) net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') # 利用卷积的方式模拟全连接层,效果等同,输出net为(1,1,4096) net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') # 利用卷积的方式模拟全连接层,效果等同,输出net为(1,1,1000) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # 由于用卷积的方式模拟全连接层,所以输出需要平铺 if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') return net
import matplotlib.image as mpimg import numpy as np import tensorflow as tf from tensorflow.python.ops import array_ops def load_image(path): # 读取图片,rgb img = mpimg.imread(path) # 将图片修剪成中心的正方形 short_edge = min(img.shape[:2]) yy = int((img.shape[0] - short_edge) / 2) xx = int((img.shape[1] - short_edge) / 2) crop_img = img[yy: yy + short_edge, xx: xx + short_edge] return crop_img def resize_image(image, size, method=tf.image.ResizeMethod.BILINEAR, align_corners=False): with tf.name_scope('resize_image'): image = tf.expand_dims(image, 0) image = tf.image.resize_images(image, size, method, align_corners) image = tf.reshape(image, tf.stack([-1, size[0], size[1], 3])) return image def print_prob(prob, file_path): synset = [l.strip() for l in open(file_path).readlines()] # 将概率从大到小排列的结果的序号存入pred pred = np.argsort(prob)[::-1] # 取最大的1个、5个。 top1 = synset[pred[0]] print(("Top1: ", top1, prob[pred[0]])) top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)] print(("Top5: ", top5)) return top1
from nets import vgg16 import tensorflow as tf import numpy as np import utils # 读取图片 img1 = utils.load_image("./test_data/table.jpg") # 对输入的图片进行resize,使其shape满足(-1,224,224,3) inputs = tf.placeholder(tf.float32, [None, None, 3]) resized_img = utils.resize_image(inputs, (224, 224)) # 建立网络结构 vprediction = vgg16.vgg_16(resized_img) # 载入模型 sess = tf.Session() ckpt_filename = './model/vgg_16.ckpt' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, ckpt_filename) # 最后结果进行softmax预测 pro = tf.nn.softmax(vprediction) pre = sess.run(pro, feed_dict={inputs: img1}) # 打印预测结果 print("result: ") utils.print_prob(pre[0], './synset.txt')
('Top1: ', 'n03201208 dining table, board', 0.98421836)
('Top5: ', [('n03201208 dining table, board', 0.98421836), ('n03376595 folding chair', 0.015443151), ('n03179701 desk', 0.00021209965), ('n04553703 washbasin, handbasin, washbowl, lavabo, wash - hand basin', 4.892502e-05), ("n03992509 potter's wheel", 1.5453264e-05)])
Residual net(残差网络):将靠前若干层的某一层数据输出直接跳过多层引入到后面数据的输入部分。
""" ResNet50的网络部分 """ from __future__ import print_function import numpy as np from keras import layers import json from keras.layers import Input from keras.layers import Dense, Conv2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D from keras.layers import Activation, BatchNormalization, Flatten from keras.models import Model from keras.preprocessing import image import keras.backend as K from keras.utils.data_utils import get_file from keras.applications.imagenet_utils import decode_predictions from keras.applications.imagenet_utils import preprocess_input def identity_block(input_tensor, kernel_size, filters, stage, block): filters1, filters2, filters3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): filters1, filters2, filters3 = filters conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(name=bn_name_base + '2c')(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x def ResNet50(input_shape=[224, 224, 3], classes=1000): img_input = Input(shape=input_shape) x = ZeroPadding2D((3, 3))(img_input) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = AveragePooling2D((7, 7), name='avg_pool')(x) x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) model = Model(img_input, x, name='resnet50') return model if __name__ == '__main__': model = ResNet50() model.summary() model.load_weights("resnet50_weights_tf_dim_ordering_tf_kernels.h5") img_path = 'elephant.jpg' # img_path = 'bike.jpg' img = image.load_img(img_path, target_size=(224, 224)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) with open('../imagenet_class_index.json') as f: js = json.load(f) for p in preds: # 遍历每个input s = [(js[str(i)], p[i]) for i in p.argsort()[-1:-4:-1]] # 输出最大3个 print(s)
__________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) (None, 224, 224, 3) 0 __________________________________________________________________________________________________ zero_padding2d_1 (ZeroPadding2D (None, 230, 230, 3) 0 input_1[0][0] __________________________________________________________________________________________________ conv1 (Conv2D) (None, 112, 112, 64) 9472 zero_padding2d_1[0][0] __________________________________________________________________________________________________ bn_conv1 (BatchNormalization) (None, 112, 112, 64) 256 conv1[0][0] __________________________________________________________________________________________________ activation_1 (Activation) (None, 112, 112, 64) 0 bn_conv1[0][0] __________________________________________________________________________________________________ max_pooling2d_1 (MaxPooling2D) (None, 55, 55, 64) 0 activation_1[0][0] __________________________________________________________________________________________________ res2a_branch2a (Conv2D) (None, 55, 55, 64) 4160 max_pooling2d_1[0][0] __________________________________________________________________________________________________ bn2a_branch2a (BatchNormalizati (None, 55, 55, 64) 256 res2a_branch2a[0][0] __________________________________________________________________________________________________ activation_2 (Activation) (None, 55, 55, 64) 0 bn2a_branch2a[0][0] __________________________________________________________________________________________________ res2a_branch2b (Conv2D) (None, 55, 55, 64) 36928 activation_2[0][0] __________________________________________________________________________________________________ bn2a_branch2b (BatchNormalizati (None, 55, 55, 64) 256 res2a_branch2b[0][0] __________________________________________________________________________________________________ activation_3 (Activation) (None, 55, 55, 64) 0 bn2a_branch2b[0][0] __________________________________________________________________________________________________ res2a_branch2c (Conv2D) (None, 55, 55, 256) 16640 activation_3[0][0] __________________________________________________________________________________________________ res2a_branch1 (Conv2D) (None, 55, 55, 256) 16640 max_pooling2d_1[0][0] _______________________________________________________________________________
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。