14.tensorflow:搭建分类网络并训练自己的数据_tensorflow1.x训练自己的数据集resnet图像分类

作者：小小林熬夜学编程 | 2024-02-16 11:33:37

踩

tensorflow1.x训练自己的数据集resnet图像分类

本示例可以实现的功能：

1，用tf.contrib.layers搭建分类网络(自己也可以用tf.nn搭建)；2，直接通过数据的路径读取批量数据（或者直接给个包含图像路径及标签的txt，在用caffe时经常这样搞），不用转换文件格式；3，学习率衰减；4，用tensorboard观察模型的结构及训练过程中的loss, accuracy, learning rate变化情况，有助于调参；5，保存模型；6，加载训练好的模型，并对图像进行预测。

这里使用flower data数据集http://download.tensorflow.org/example_images/flower_photos.tgz

该数据分为5类。

主函数：train.py

其中get_batch用于加载批量数据。get_files返回图像路径及标签


#coding:utf-8
import os
import numpy as np
import tensorflow as tf
import glob
 
import model
 
 
init_lr = 0.001
decay_steps = 10000
MAX_STEP = 200000
N_CLASSES = 5
IMG_W = 224
IMG_H = 224
BATCH_SIZE = 32
CAPACITY = 2000
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # gpu编号
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4} # 手动指定一个名字到label的映射关系,必须从0开始
 
train_dir = 'flowers/flower_photos'  # 该文件下放着各类图像的子文件夹这里有5个
logs_train_dir = './model_save'
 
config = tf.ConfigProto()
config.gpu_options.allow_growth = True # 设置最小gpu使用量
 
 
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)
    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label], shuffle=False)
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents, channels=3)
    # 数据增强
    #image = tf.image.resize_image_with_pad(image, target_height=image_W, target_width=image_H)
    image = tf.image.resize_images(image, (image_W, image_H))
    # 随机左右翻转
    image = tf.image.random_flip_left_right(image)
    # 随机上下翻转
    image = tf.image.random_flip_up_down(image)
    # 随机设置图片的亮度
    image = tf.image.random_brightness(image, max_delta=32/255.0)
    # 随机设置图片的对比度
    image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
    # 随机设置图片的色度
    image = tf.image.random_hue(image, max_delta=0.3)
    # 随机设置图片的饱和度
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    # 标准化,使图片的均值为0，方差为1
    image = tf.image.per_image_standardization(image)
    image_batch, label_batch = tf.train.batch([image, label],
                                                batch_size= batch_size,
                                                num_threads= 64,
                                                capacity = capacity)
    label_batch = tf.reshape(label_batch, [batch_size])
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch, label_batch
 
 
def get_files(file_dir):
    image_list, label_list = [], []
    for label in os.listdir(file_dir):
        for img in glob.glob(os.path.join(file_dir, label, "*.jpg")):
            image_list.append(img)
            label_list.append(label_dict[label])
    print('There are %d data' %(len(image_list)))
    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list]
    return image_list, label_list
 
 
def main():    
    global_step = tf.Variable(0, name='global_step', trainable=False)    
    # dataset
    train, train_label = get_files(train_dir)
    # label without one-hot
    batch_train, batch_labels = get_batch(train,
                                          train_label,
                                          IMG_W,
                                          IMG_H,
                                          BATCH_SIZE, 
                                          CAPACITY)
    # network
    #logits = model.model2(batch_train, BATCH_SIZE, N_CLASSES)
    logits = model.model4(batch_train, N_CLASSES, is_trian=True)
    # loss
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=batch_labels)
    loss = tf.reduce_mean(cross_entropy, name='loss')
    tf.summary.scalar('train_loss', loss)
    # optimizer
    lr = tf.train.exponential_decay(learning_rate=init_lr, global_step=global_step, decay_steps=decay_steps, decay_rate=0.1)
    tf.summary.scalar('learning_rate', lr)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss, global_step=global_step)
    # accuracy
    correct = tf.nn.in_top_k(logits, batch_labels, 1)
    correct = tf.cast(correct, tf.float16)
    accuracy = tf.reduce_mean(correct)
    tf.summary.scalar('train_acc', accuracy)
    
    summary_op = tf.summary.merge_all()
    sess = tf.Session(config=config)
    train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
    saver = tf.train.Saver()
    
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    #saver.restore(sess, logs_train_dir+'/model.ckpt-174000') 
    try:
        for step in range(MAX_STEP):
            if coord.should_stop():
                    break
            _, learning_rate, tra_loss, tra_acc = sess.run([optimizer, lr, loss, accuracy])
            if step % 50 == 0:
                print('Step %4d, lr %f, train loss = %.2f, train accuracy = %.2f%%' %(step, learning_rate, tra_loss, tra_acc*100.0))
                summary_str = sess.run(summary_op)
                train_writer.add_summary(summary_str, step)
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()
        
    coord.join(threads)
    sess.close()
    
 
if __name__ == '__main__':
    main()

模型定义：model.py


#coding:utf-8
 
from tensorflow.contrib.layers.python.layers import batch_norm
import tensorflow as tf
import inspect
import os
import numpy as np
import time
 
 
def model4(x, N_CLASSES, is_trian = False):
    x = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
    x = batch_norm(x, decay=0.9, updates_collections=None, is_training=is_trian)  # 训练阶段is_trainging设置为true,训练完毕后使用模型时设置为false
    x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
 
    x1_1 = tf.contrib.layers.conv2d(x, 64, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu)  # 1X1 核
    x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
    x3_3 = tf.contrib.layers.conv2d(x, 64, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu)  # 3x3 核
    x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
    x5_5 = tf.contrib.layers.conv2d(x, 64, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)  # 5x5 核
    x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
    x = tf.concat([x1_1, x3_3, x5_5], axis=-1)  # 连接在一起，得到64*3=192个通道
    x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
 
    x1_1 = tf.contrib.layers.conv2d(x, 128, [1, 1], 1, 'SAME', activation_fn=tf.nn.relu)
    x1_1 = batch_norm(x1_1, decay=0.9, updates_collections=None, is_training=is_trian)
    x3_3 = tf.contrib.layers.conv2d(x, 128, [3, 3], 1, 'SAME', activation_fn=tf.nn.relu)
    x3_3 = batch_norm(x3_3, decay=0.9, updates_collections=None, is_training=is_trian)
    x5_5 = tf.contrib.layers.conv2d(x, 128, [5, 5], 1, 'SAME', activation_fn=tf.nn.relu)
    x5_5 = batch_norm(x5_5, decay=0.9, updates_collections=None, is_training=is_trian)
    x = tf.concat([x1_1, x3_3, x5_5], axis=-1)
    x = tf.contrib.layers.max_pool2d(x, [2, 2], stride=2, padding='SAME')
 
    shp = x.get_shape()
    x = tf.reshape(x, [-1, shp[1]*shp[2]*shp[3]])  # flatten
    x = tf.contrib.layers.fully_connected(x, N_CLASSES, activation_fn=None)  # output logist without softmax
    return x
 
 
def model2(images, batch_size, n_classes):
    '''Build the model
    Args:
        images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
    Returns:
        output tensor with the computed logits, float, [batch_size, n_classes]
    '''
    #conv1, shape = [kernel size, kernel size, channels, kernel numbers]
    
    with tf.variable_scope('conv1') as scope:
        weights = tf.get_variable('weights', 
                                  shape = [3,3,3, 16],
                                  dtype = tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        biases = tf.get_variable('biases', 
                                 shape=[16],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_activation, name= scope.name)
    
    #pool1 and norm1   
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
                               padding='SAME', name='pooling1')
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75,name='norm1')
    
    #conv2
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',
                                  shape=[3,3,16,16],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[16], 
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')
    
    
    #pool2 and norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75,name='norm2')
        pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
                               padding='SAME',name='pooling2')
    
    
    #local3
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(pool2, shape=[batch_size, -1])
        dim = reshape.get_shape()[1].value
        weights = tf.get_variable('weights',
                                  shape=[dim,128],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[128],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)    
    
    #local4
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',
                                  shape=[128,128],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[128],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
     
        
    # full connect
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('softmax_linear',
                                  shape=[128, n_classes],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases', 
                                 shape=[n_classes],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        logits = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
    
    return logits

加载训练好的模型并进行预测，predict.py：


#coding:utf-8
import os, cv2
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # use cpu
 
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import glob
 
import model
 
 
N_CLASSES = 5
IMG_W = 224
IMG_H = IMG_W
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # use gpu 0
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
label_dict_res = {v:k for k,v in label_dict.items()}
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
 
 
def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
    global sess, pred, x
    # process image
    x = tf.placeholder(tf.float32, shape=[IMG_W, IMG_W, 3])
    x_norm = tf.image.per_image_standardization(x)
    x_4d = tf.reshape(x_norm, [1, IMG_W, IMG_W, 3])
    # predict
    logit = model.model4(x_4d, N_CLASSES, is_trian=False)
    #logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
    pred = tf.nn.softmax(logit)
 
    saver = tf.train.Saver()
    sess = tf.Session(config=config)
    saver.restore(sess, logs_train_dir)
    print('load model done...')
 
def evaluate_image(img_dir):
    # read image
    im = cv2.imread(img_dir)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (IMG_W, IMG_W))
    image_array = np.array(im)
 
    prediction = sess.run(pred, feed_dict={x: image_array})
    max_index = np.argmax(prediction)
    print("%s, predict: %s, prob: %f" %(os.path.basename(img_dir), label_dict_res[max_index], prediction[0][max_index]))
    
 
if __name__ == '__main__':
    init_tf()
    # data_path = 'flowers/flower_photos'
    # label = os.listdir(data_path)
    # for l in label:
    #     if os.path.isfile(os.path.join(data_path, l)):
    #         continue
    #     for img in glob.glob(os.path.join(data_path, l, "*.jpg")):
    #         print(img)
    #         evaluate_image(img_dir=img)
    for img in glob.glob("./*.jpg"):
        evaluate_image(img)
    sess.close()

实现批量预测，同时预测多张图像，节约时间, predict_batch.py：


#coding:utf-8
import os, cv2, time
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # use cpu
 
import numpy as np
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import glob
 
import model
 
 
N_CLASSES = 5
IMG_W = 224
IMG_H = IMG_W
batch_size = 32
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # use gpu 0
label_dict = {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips':4}
label_dict_res = {v:k for k,v in label_dict.items()}
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
 
 
def get_imgpath(path):
    img_list = []
    for fpath , dirs , fs in os.walk(path):
        for f in fs:
            img_path = os.path.join(fpath , f)
            if os.path.dirname(img_path) == os.getcwd():
                continue
            if not os.path.isfile(img_path):
                continue
            if os.path.basename(img_path)[-3:] == "jpg":
                img_list.append(img_path)
    return img_list
 
 
def init_tf(logs_train_dir = './model_save/model.ckpt-174000'):
    global sess, pred, x
    # process image
    x = tf.placeholder(tf.float32, shape=[None, IMG_W, IMG_W, 3])
    # predict
    logit = model.model4(x, N_CLASSES, is_trian=False)
    #logit = model.model2(x_4d, batch_size=1, n_classes=N_CLASSES)
    pred = tf.nn.softmax(logit)
 
    saver = tf.train.Saver()
    sess = tf.Session(config=config)
    saver.restore(sess, logs_train_dir)
    print('load model done...')
 
def evaluate_image(img_dir):
    # read and process image
    batch_img = []
    for img in img_dir:    
        im = cv2.imread(img)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = cv2.resize(im, (IMG_W, IMG_W))
        im_mean = np.mean(im)
        stddev = max(np.std(im), 1.0/np.sqrt(IMG_W*IMG_H*3))
        im = (im - im_mean) / stddev
        image_array = np.array(im)
        batch_img.append(image_array)
    # output sotfmax
    prediction = sess.run(pred, feed_dict={x: batch_img})
    for i in range(len(img_dir)):
        img = img_dir[i]
        max_index = np.argmax(prediction[i])
        print("img:%s, predict: %s, prob: %f" % (img, label_dict_res[max_index], prediction[i][max_index]))
    
 
if __name__ == '__main__':
    init_tf()
    data_path = 'flowers/flower_photos'
    img_list = get_imgpath(data_path)
    total_batch = len(img_list)/batch_size
    start = time.time()
    for i in range(total_batch):
        print(str(i) + "-"*50)
        batch_img = img_list[i*batch_size: (i+1)*batch_size]
        evaluate_image(batch_img)
    print("time cost:", time.time()-start)
    sess.close()

本文内容由网友自发贡献，转载请注明出处：【wpsshop博客】