赞
踩
本示例可以实现的功能:
1,用tf.contrib.layers搭建分类网络(自己也可以用tf.nn搭建);2,直接通过数据的路径读取批量数据(或者直接给个包含图像路径及标签的txt,在用caffe时经常这样搞),不用转换文件格式;3,学习率衰减;4,用tensorboard观察模型的结构及训练过程中的loss, accuracy, learning rate变化情况,有助于调参;5,保存模型;6,加载训练好的模型,并对图像进行预测。
这里使用flower data数据集http://download.tensorflow.org/example_images/flower_photos.tgz
该数据分为5类。
主函数:train.py
其中get_batch用于加载批量数据。get_files返回图像路径及标签
- #coding:utf-8
- import os
- import numpy as np
- import tensorflow as tf
- import glob
-
- import model
-
-
- init_lr = 0.001
- decay_steps = 10000
- MAX_STEP = 200000
- N_CLASSES = 5
- IMG_W = 224
- IMG_H = 224
- BATCH_SIZE = 32
- CAPACITY = 2000
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # gpu编号
- label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4} # 手动指定一个名字到label的映射关系,必须从0开始
-
- train_dir = 'flowers/flower_photos' # 该文件下放着各类图像的子文件夹这里有5个
- logs_train_dir = './model_save'
-
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True # 设置最小gpu使用量
-
-
- def get_batch(image, label, image_W, image_H, batch_size, capacity):
- image = tf.cast(image, tf.string)
- label = tf.cast(label, tf.int32)
- # make an input queue
- input_queue = tf.train.slice_input_producer([image, label], shuffle=False)
- label = input_queue[1]
- image_contents = tf.read_file(input_queue[0])
- image = tf.image.decode_jpeg(image_contents, channels=3)
- # 数据增强
- #image = tf.image.resize_image_with_pad(image, target_height=image_W, target_width=image_H)
- image = tf.image.resize_images(image, (image_W, image_H))
- # 随机左右翻转
- image = tf.image.random_flip_left_right(image)
- # 随机上下翻转
- image = tf.image.random_flip_up_down(image)
- # 随机设置图片的亮度
- image = tf.image.random_brightness(image, max_delta=32/255.0)
- # 随机设置图片的对比度
- image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
- # 随机设置图片的色度
- image = tf.image.random_hue(image, max_delta=0.3)
- # 随机设置图片的饱和度
- image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
- # 标准化,使图片的均值为0,方差为1
- image = tf.image.per_image_standardization(image)
- image_batch, label_batch = tf.train.batch([image, label],
- batch_size= batch_size,
- num_threads= 64,
- capacity = capacity)
- label_batch = tf.reshape(label_batch, [batch_size])
- image_batch = tf.cast(image_batch, tf.float32)
- return image_batch, label_batch
-
-
- def get_files(file_dir):
- image_list, label_list = [], []
- for label in os.listdir(file_dir):
- for img in glob.glob(os.path.join(file_dir, label, "*.jpg")):
- image_list.append(img)
- label_list.append(label_dict[label])
- print('There are %d data' %(len(image_list)))
- temp = np.array([image_list, label_list])
- temp = temp.transpose()
- np.random.shuffle(temp)
- image_list = list(temp[:, 0])
- label_list = list(temp[:, 1])
- label_list = [int(i) for i in label_list]
- return image_list, label_list
-
-
- def main():
- global_step = tf.Variable(0, name='global_step', trainable=False)
- # dataset
- train, train_label = get_files(train_dir)
- # label without one-hot
- batch_train, batch_labels = get_batch(train,
- train_label,
- IMG_W,
- IMG_H,
- BATCH_SIZE,
- CAPACITY)
- # network
- #logits = model.model2(batch_train, BATCH_SIZE, N_CLASSES)
- logits = model.model4(batch_train, N_CLASSES, is_trian=True)
- # loss
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=batch_labels)
- loss = tf.reduce_mean(cross_entropy, name='loss')
- tf.summary.scalar('train_loss', loss)
- # optimizer
- lr = tf.train.exponential_decay(learning_rate=init_lr, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1)
- tf.summary.scalar('learning_rate', lr)
- optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step)
- # accuracy
- correct = tf.nn.in_top_k(logits, batch_labels, 1)
- correct = tf.cast(correct, tf.float16)
- accuracy = tf.reduce_mean(correct)
- tf.summary.scalar('train_acc', accuracy)
-
- summary_op = tf.summary.merge_all()
- sess = tf.Session(config=config)
- train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
- saver = tf.train.Saver()
-
- sess.run(tf.global_variables_initializer())
- coord = tf.train.Coordinator()
- threads = tf.train.start_queue_runners(sess=sess, coord=coord)
- #saver.restore(sess, logs_train_dir+'/model.ckpt-174000')
- try:
- for step in range(MAX_STEP):
- if coord.should_stop():
- break
- _, learning_rate, tra_loss, tra_acc = sess.run([optimizer, lr, loss, accuracy])
- if step % 50 == 0:
- print('Step %4d, lr %f, train loss = %.2f, train accuracy = %.2f%%' %(step, learning_rate, tra_loss, tra_acc*100.0))
- summary_str = sess.run(summary_op)
- train_writer.add_summary(summary_str, step)
- if step % 2000 == 0 or (step + 1) == MAX_STEP:
- checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
- saver.save(sess, checkpoint_path, global_step=step)
- except tf.errors.OutOfRangeError:
- print('Done training -- epoch limit reached')
- finally:
- coord.request_stop()
-
- coord.join(threads)
- sess.close()
-
-
- if __name__ == '__main__':
- main()
模型定义:model.py
- #coding:utf-8
-
- from tensorflow.contrib.layers.python.layers import batch_norm
- import tensorflow as tf
- import inspect
- import os
- import numpy as np
- import time
-
-
- def model4(x, N_CLASSES, is_trian = False):
- x = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
- x = batch_norm(x, decay=0.9, updates_collections=None, is_training=is_trian) # 训练阶段is_trainging设置为true,训练完毕后使用模型时设置为false
- x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
-
- x1_1 = tf.contrib.layers.conv2d(x, 64, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu) # 1X1 核
- x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
- x3_3 = tf.contrib.layers.conv2d(x, 64, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu) # 3x3 核
- x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
- x5_5 = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu) # 5x5 核
- x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
- x = tf.concat([x1_1, x3_3, x5_5], axis=-1) # 连接在一起,得到64*3=192个通道
- x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
-
- x1_1 = tf.contrib.layers.conv2d(x, 128, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu)
- x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
- x3_3 = tf.contrib.layers.conv2d(x, 128, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu)
- x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
- x5_5 = tf.contrib.layers.conv2d(x, 128, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
- x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
- x = tf.concat([x1_1, x3_3, x5_5], axis=-1)
- x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
-
- shp = x.get_shape()
- x = tf.reshape(x, [-1, shp[1]*shp[2]*shp[3]]) # flatten
- x = tf.contrib.layers.fully_connected(x, N_CLASSES, activation_fn=None) # output logist without softmax
- return x
-
-
- def model2(images, batch_size, n_classes):
- '''Build the model
- Args:
- images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
- Returns:
- output tensor with the computed logits, float, [batch_size, n_classes]
- '''
- #conv1, shape = [kernel size, kernel size, channels, kernel numbers]
-
- with tf.variable_scope('conv1') as scope:
- weights = tf.get_variable('weights',
- shape = [3,3,3, 16],
- dtype = tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- biases = tf.get_variable('biases',
- shape=[16],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
- pre_activation = tf.nn.bias_add(conv, biases)
- conv1 = tf.nn.relu(pre_activation, name= scope.name)
-
- #pool1 and norm1
- with tf.variable_scope('pooling1_lrn') as scope:
- pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
- padding='SAME', name='pooling1')
- norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
- beta=0.75,name='norm1')
-
- #conv2
- with tf.variable_scope('conv2') as scope:
- weights = tf.get_variable('weights',
- shape=[3,3,16,16],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- biases = tf.get_variable('biases',
- shape=[16],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
- pre_activation = tf.nn.bias_add(conv, biases)
- conv2 = tf.nn.relu(pre_activation, name='conv2')
-
-
- #pool2 and norm2
- with tf.variable_scope('pooling2_lrn') as scope:
- norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
- beta=0.75,name='norm2')
- pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
- padding='SAME',name='pooling2')
-
-
- #local3
- with tf.variable_scope('local3') as scope:
- reshape = tf.reshape(pool2, shape=[batch_size, -1])
- dim = reshape.get_shape()[1].value
- weights = tf.get_variable('weights',
- shape=[dim,128],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
- biases = tf.get_variable('biases',
- shape=[128],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
-
- #local4
- with tf.variable_scope('local4') as scope:
- weights = tf.get_variable('weights',
- shape=[128,128],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
- biases = tf.get_variable('biases',
- shape=[128],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
-
-
- # full connect
- with tf.variable_scope('softmax_linear') as scope:
- weights = tf.get_variable('softmax_linear',
- shape=[128, n_classes],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
- biases = tf.get_variable('biases',
- shape=[n_classes],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- logits = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
-
- return logits
-
加载训练好的模型并进行预测,predict.py:
- #coding:utf-8
- import os, cv2
- #os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # use cpu
-
- import numpy as np
- import tensorflow as tf
- from PIL import Image
- import matplotlib.pyplot as plt
- import glob
-
- import model
-
-
- N_CLASSES = 5
- IMG_W = 224
- IMG_H = IMG_W
-
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # use gpu 0
- label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
- label_dict_res = {v:k for k,v in label_dict.items()}
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
-
-
- def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
- global sess, pred, x
- # process image
- x = tf.placeholder(tf.float32, shape=[IMG_W, IMG_W, 3])
- x_norm = tf.image.per_image_standardization(x)
- x_4d = tf.reshape(x_norm, [1, IMG_W, IMG_W, 3])
- # predict
- logit = model.model4(x_4d, N_CLASSES, is_trian=False)
- #logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
- pred = tf.nn.softmax(logit)
-
- saver = tf.train.Saver()
- sess = tf.Session(config=config)
- saver.restore(sess, logs_train_dir)
- print('load model done...')
-
- def evaluate_image(img_dir):
- # read image
- im = cv2.imread(img_dir)
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- im = cv2.resize(im, (IMG_W, IMG_W))
- image_array = np.array(im)
-
- prediction = sess.run(pred, feed_dict={x: image_array})
- max_index = np.argmax(prediction)
- print("%s, predict: %s, prob: %f" %(os.path.basename(img_dir), label_dict_res[max_index], prediction[0][max_index]))
-
-
- if __name__ == '__main__':
- init_tf()
- # data_path = 'flowers/flower_photos'
- # label = os.listdir(data_path)
- # for l in label:
- # if os.path.isfile(os.path.join(data_path, l)):
- # continue
- # for img in glob.glob(os.path.join(data_path, l, "*.jpg")):
- # print(img)
- # evaluate_image(img_dir=img)
- for img in glob.glob("./*.jpg"):
- evaluate_image(img)
- sess.close()
实现批量预测,同时预测多张图像,节约时间, predict_batch.py:
- #coding:utf-8
- import os, cv2, time
- #os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # use cpu
-
- import numpy as np
- import tensorflow as tf
- from PIL import Image
- import matplotlib.pyplot as plt
- import glob
-
- import model
-
-
- N_CLASSES = 5
- IMG_W = 224
- IMG_H = IMG_W
- batch_size = 32
-
- os.environ["CUDA_VISIBLE_DEVICES"] = "0" # use gpu 0
- label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
- label_dict_res = {v:k for k,v in label_dict.items()}
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
-
-
- def get_imgpath(path):
- img_list = []
- for fpath , dirs , fs in os.walk(path):
- for f in fs:
- img_path = os.path.join(fpath , f)
- if os.path.dirname(img_path) == os.getcwd():
- continue
- if not os.path.isfile(img_path):
- continue
- if os.path.basename(img_path)[-3:] == "jpg":
- img_list.append(img_path)
- return img_list
-
-
- def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
- global sess, pred, x
- # process image
- x = tf.placeholder(tf.float32, shape=[None, IMG_W, IMG_W, 3])
- # predict
- logit = model.model4(x, N_CLASSES, is_trian=False)
- #logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
- pred = tf.nn.softmax(logit)
-
- saver = tf.train.Saver()
- sess = tf.Session(config=config)
- saver.restore(sess, logs_train_dir)
- print('load model done...')
-
- def evaluate_image(img_dir):
- # read and process image
- batch_img = []
- for img in img_dir:
- im = cv2.imread(img)
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- im = cv2.resize(im, (IMG_W, IMG_W))
- im_mean = np.mean(im)
- stddev = max(np.std(im), 1.0/np.sqrt(IMG_W*IMG_H*3))
- im = (im - im_mean) / stddev
- image_array = np.array(im)
- batch_img.append(image_array)
- # output sotfmax
- prediction = sess.run(pred, feed_dict={x: batch_img})
- for i in range(len(img_dir)):
- img = img_dir[i]
- max_index = np.argmax(prediction[i])
- print("img:%s, predict: %s, prob: %f" % (img, label_dict_res[max_index], prediction[i][max_index]))
-
-
- if __name__ == '__main__':
- init_tf()
- data_path = 'flowers/flower_photos'
- img_list = get_imgpath(data_path)
- total_batch = len(img_list)/batch_size
- start = time.time()
- for i in range(total_batch):
- print(str(i) + "-"*50)
- batch_img = img_list[i*batch_size: (i+1)*batch_size]
- evaluate_image(batch_img)
- print("time cost:", time.time()-start)
- sess.close()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。