- # import modules
- import numpy as np
- import matplotlib.pyplot as plt
- import tensorflow as tf
- import time
- from datetime import timedelta
- import math
- from tensorflow.examples.tutorials.mnist import input_data
- import os
- os.environ['CUDA_VISIBLE_DEVICES'] = '0'
- # 设置按需使用GPU
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
- sess = tf.InteractiveSession(config=config)
- # 权值初始化
- def weight_variable(shape):
- # 用正态分布来初始化权值
- initial = tf.truncated_normal(shape, stddev=0.1)
- return tf.Variable(initial)
- # def weight_variable(shape):
- # return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
- # 偏置初始化
- def bias_variable(shape):
- # 本例中用relu激活函数,所以用一个很小的正偏置较好
- initial = tf.constant(0.1, shape=shape)
- return tf.Variable(initial)
- # def bias_variable(length):
- # return tf.Variable(tf.constant(0.1, shape=length))
- # input 代表输入,filter 代表卷积核
- def conv2d(input, filter):
- return tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='SAME')
- # def conv2d(x, W):
- # return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
- # 2x2最大池化层
- def max_pool(input):
- return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
- # def max_pool_2x2(inputx):
- # return tf.nn.max_pool(inputx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
- # import data
- # data = input_data.read_data_sets("./data", one_hot=True) # one_hot means [0 0 1 0 0 0 0 0 0 0] stands for 2
- mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
- print("Size of:")
- print("--Training-set:\t\t{}".format(len(mnist.train.labels)))
- print("--Testing-set:\t\t{}".format(len(mnist.test.labels)))
- print("--Validation-set:\t{}".format(len(mnist.validation.labels)))
- mnist.test.cls = np.argmax(mnist.test.labels, axis=1) # show the real test labels: [7 2 1 ..., 4 5 6], 10000values
- # input_layer
- x_input = tf.placeholder(tf.float32, [None, 784], name="x_input")
- input_image = tf.reshape(x_input, [-1, 28, 28, 1])
- # x = tf.placeholder("float", shape=[None, 784], name='x')
- # x_image = tf.reshape(x, [-1, 28, 28, 1])
- y_input = tf.placeholder(tf.float32, [None, 10], name="y_input")
- # y_true = tf.placeholder("float", shape=[None, 10], name='y_true')
- y_true_cls = tf.argmax(y_input, dimension=1)
- # Conv 1
- layer_conv1 = {"weights": weight_variable([5, 5, 1, 32]),
- "biases": bias_variable([32])}
- h_conv1 = tf.nn.relu(conv2d(input_image, layer_conv1["weights"]) + layer_conv1["biases"])
- h_pool1 = max_pool(h_conv1)
- # Conv 2
- layer_conv2 = {"weights": weight_variable([5, 5, 32, 64]),
- "biases": bias_variable([64])}
- h_conv2 = tf.nn.relu(conv2d(h_pool1, layer_conv2["weights"]) + layer_conv2["biases"])
- h_pool2 = max_pool(h_conv2)
- # Flat
- h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
- # Full-connected layer 1
- fc1_layer = {"weights": weight_variable([7 * 7 * 64, 1024]),
- "biases": bias_variable([1024])}
- h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, fc1_layer["weights"]) + fc1_layer["biases"])
- # Droupout Layer
- keep_prob = tf.placeholder("float")
- h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
- # Full-connected layer 2
- fc2_layer = {"weights": weight_variable([1024, 10]),
- "biases": bias_variable([10])}
- # Predicted class
- y_pred = tf.nn.softmax(
- tf.matmul(h_fc1_drop, fc2_layer["weights"]) + fc2_layer["biases"]) # The output is like [0 0 1 0 0 0 0 0 0 0]
- y_pred_cls = tf.argmax(y_pred, dimension=1) # Show the real predict number like '2'
- # cost function to be optimized
- # 损失模型隐藏到loss-model模块
- with tf.name_scope("loss-model"):
- # 1.损失函数loss:cross_entropy
- cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
- optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
- # 给损失模型的输出添加scalar,用来观察loss的收敛曲线
- tf.summary.scalar("loss", cross_entropy)
- # cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
- # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
- # Performance Measures
- with tf.name_scope("accuracy-model"):
- # y_pre和y_input一行对应一个标签,行数对应batch的size大小
- correct_prediction = tf.equal(y_pred_cls, y_true_cls)
- accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
- # 给损失模型的输出添加scalar,用来观察accracy的收敛曲线
- tf.summary.scalar("test_acc", accuracy)
- # correct_prediction = tf.equal(y_pred_cls, y_true_cls)
- # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
- def optimize(num_iterations):
- total_iterations = 0
- start_time = time.time()
- # 调用 merge_all() 收集所有的操作数据
- merged = tf.summary.merge_all()
- # 模型运行产生的所有数据保存到 ./tensorflow 文件夹供 TensorBoard 使用
- writer = tf.summary.FileWriter('./tensorboard', sess.graph, filename_suffix="_mnist")
- for i in range(total_iterations, total_iterations + num_iterations):
- x_batch, y_batch = mnist.train.next_batch(train_batch_size)
- feed_dict_train_op = {x_input: x_batch, y_input: y_batch, keep_prob: 0.5}
- feed_dict_test = {x_input: mnist.test.images, y_input: mnist.test.labels, keep_prob: 1.0}
- # summary = sess.run(merged, feed_dict={x_input: x_batch, y_input: y_batch})
- # train_step.run(feed_dict={x_input: x_batch, y_input: y_batch})
- # 等价于上述两条语句
- summary, _, train_loss = sess.run([merged, optimizer, cross_entropy],
- feed_dict=feed_dict_train_op)
- # sess.run(optimizer, feed_dict=feed_dict_train_op)
- # Print status every 100 iterations.
- if i % 100 == 0:
- # Calculate the accuracy on the training-set.
- acc = sess.run(accuracy, feed_dict=feed_dict_test)
- # Message for printing.
- msg = "Optimization Iteration:{0:>6}, Training Accuracy: {1:>6.1%}"
- # Print it.
- # print(msg.format(i + 1, acc))
- print(msg.format(i, acc))
- writer.add_summary(summary, i)
- # Update the total number of iterations performed
- total_iterations += num_iterations
- # Ending time
- end_time = time.time()
- # Difference between start and end_times.
- time_dif = end_time - start_time
- # Print the time-usage
- print("Time usage:" + str(timedelta(seconds=int(round(time_dif)))))
- def print_test_accuracy():
- # Number of images in the test-set.
- num_test = len(mnist.test.images)
- cls_pred = np.zeros(shape=num_test, dtype=np.int)
- i = 0
- while i < num_test:
- # The ending index for the next batch is denoted j.
- j = min(i + test_batch_size, num_test)
- # Get the images from the test-set between index i and j
- images = mnist.test.images[i:j, :]
- # Get the associated labels
- labels = mnist.test.labels[i:j, :]
- # Create a feed-dict with these images and labels.
- feed_dict = {x_input: images, y_input: labels, keep_prob: 1.0}
- # Calculate the predicted class using Tensorflow.
- cls_pred[i:j] = sess.run(y_pred_cls, feed_dict=feed_dict)
- # Set the start-index for the next batch to the
- # end-index of the current batch
- i = j
- cls_true = mnist.test.cls
- correct = (cls_true == cls_pred)
- correct_sum = correct.sum()
- acc = float(correct_sum) / num_test
- # Print the accuracy
- msg = "Accuracy on Test-Set: {0:.1%} ({1}/{2})"
- print(msg.format(acc, correct_sum, num_test))
- with tf.Session() as sess:
- init = tf.global_variables_initializer()
- sess.run(init)
- train_batch_size = 50
- test_batch_size = 256
- # Performance after 20000 optimization iterations
- optimize(20000)
- print_test_accuracy()
- # 输出结果:
- # Optimization Iteration: 19800, Training Accuracy: 99.3%
- # Optimization Iteration: 19900, Training Accuracy: 99.3%
- # Time usage:0:01:28
- # Accuracy on Test-Set: 99.3% (9933/10000)
这一层的输入就是原始的图像像素,LeNet-5模型接受的输入层大小为32*32*1。第一个卷积层过滤器的尺寸为5*5,深度为6,不使用全0填充,步长为1。因为没有使用全0填充,所以这一层的输出的尺寸为32-5+1=28,深度为6。这一个卷积层总共有 5x5x1x6+6=156个参数,其中6个为偏置项参数。因为下一层的节点矩阵有28x28x6=4704个节点,每个节点和5x5=25个当前层节点相连,所以本层卷积层总共有4704 x(25+1)=122304个连接。
本层的输入矩阵大小为14x14x6,使用的过滤器大小为5x5,深度为16。本层不使用全0填充,步长为1。本层的输出矩阵大小为10x10x16。按照标准的卷积层,本层应该有 5x5x6x16+16=2416个参数,10x10x16x (25+1) =41600个连接。
本层的输入矩阵大小为5x5x16,在LeNet-5模型的论文中将这一层称为卷积层,但是因为过滤器的大小就是5x5,所以和全连接层没有区别,在TensorFlow程序实现中也会将这一层看成全连接层。本层的输出节点个数为120,总共有 5x5x16x120+120=48120个参数。同时本层有5x5x16x120+120=48120个连接。
本层的输入节点个数为120个,输出节点个数为84个,总共参数为120x84+84=10164 个。同时本层有120x84+84=10164个连接。
本程序是根据LeNet-5实现手写体数字识别分类 ,模型框架如图所示:
