当前位置:   article > 正文

Tensorflow实现LeNet-5 MNIST手写体数字识别分类_基于tensorflow使用lenet-5实现对mnist手写数字识别分类

基于tensorflow使用lenet-5实现对mnist手写数字识别分类
  1. # import modules
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import tensorflow as tf
  5. import time
  6. from datetime import timedelta
  7. import math
  8. from tensorflow.examples.tutorials.mnist import input_data
  9. import os
  10. os.environ['CUDA_VISIBLE_DEVICES'] = '0'
  11. # 设置按需使用GPU
  12. config = tf.ConfigProto()
  13. config.gpu_options.allow_growth = True
  14. sess = tf.InteractiveSession(config=config)
  15. # 权值初始化
  16. def weight_variable(shape):
  17. # 用正态分布来初始化权值
  18. initial = tf.truncated_normal(shape, stddev=0.1)
  19. return tf.Variable(initial)
  20. # def weight_variable(shape):
  21. # return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
  22. # 偏置初始化
  23. def bias_variable(shape):
  24. # 本例中用relu激活函数,所以用一个很小的正偏置较好
  25. initial = tf.constant(0.1, shape=shape)
  26. return tf.Variable(initial)
  27. # def bias_variable(length):
  28. # return tf.Variable(tf.constant(0.1, shape=length))
  29. # input 代表输入,filter 代表卷积核
  30. def conv2d(input, filter):
  31. return tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='SAME')
  32. # def conv2d(x, W):
  33. # return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
  34. # 2x2最大池化层
  35. def max_pool(input):
  36. return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
  37. # def max_pool_2x2(inputx):
  38. # return tf.nn.max_pool(inputx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
  39. # import data
  40. # data = input_data.read_data_sets("./data", one_hot=True) # one_hot means [0 0 1 0 0 0 0 0 0 0] stands for 2
  41. mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
  42. print("Size of:")
  43. print("--Training-set:\t\t{}".format(len(mnist.train.labels)))
  44. print("--Testing-set:\t\t{}".format(len(mnist.test.labels)))
  45. print("--Validation-set:\t{}".format(len(mnist.validation.labels)))
  46. mnist.test.cls = np.argmax(mnist.test.labels, axis=1) # show the real test labels: [7 2 1 ..., 4 5 6], 10000values
  47. # input_layer
  48. x_input = tf.placeholder(tf.float32, [None, 784], name="x_input")
  49. input_image = tf.reshape(x_input, [-1, 28, 28, 1])
  50. # x = tf.placeholder("float", shape=[None, 784], name='x')
  51. # x_image = tf.reshape(x, [-1, 28, 28, 1])
  52. y_input = tf.placeholder(tf.float32, [None, 10], name="y_input")
  53. # y_true = tf.placeholder("float", shape=[None, 10], name='y_true')
  54. y_true_cls = tf.argmax(y_input, dimension=1)
  55. # Conv 1
  56. layer_conv1 = {"weights": weight_variable([5, 5, 1, 32]),
  57. "biases": bias_variable([32])}
  58. h_conv1 = tf.nn.relu(conv2d(input_image, layer_conv1["weights"]) + layer_conv1["biases"])
  59. h_pool1 = max_pool(h_conv1)
  60. # Conv 2
  61. layer_conv2 = {"weights": weight_variable([5, 5, 32, 64]),
  62. "biases": bias_variable([64])}
  63. h_conv2 = tf.nn.relu(conv2d(h_pool1, layer_conv2["weights"]) + layer_conv2["biases"])
  64. h_pool2 = max_pool(h_conv2)
  65. # Flat
  66. h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
  67. # Full-connected layer 1
  68. fc1_layer = {"weights": weight_variable([7 * 7 * 64, 1024]),
  69. "biases": bias_variable([1024])}
  70. h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, fc1_layer["weights"]) + fc1_layer["biases"])
  71. # Droupout Layer
  72. keep_prob = tf.placeholder("float")
  73. h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
  74. # Full-connected layer 2
  75. fc2_layer = {"weights": weight_variable([1024, 10]),
  76. "biases": bias_variable([10])}
  77. # Predicted class
  78. y_pred = tf.nn.softmax(
  79. tf.matmul(h_fc1_drop, fc2_layer["weights"]) + fc2_layer["biases"]) # The output is like [0 0 1 0 0 0 0 0 0 0]
  80. y_pred_cls = tf.argmax(y_pred, dimension=1) # Show the real predict number like '2'
  81. # cost function to be optimized
  82. # 损失模型隐藏到loss-model模块
  83. with tf.name_scope("loss-model"):
  84. # 1.损失函数loss:cross_entropy
  85. cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
  86. optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
  87. # 给损失模型的输出添加scalar,用来观察loss的收敛曲线
  88. tf.summary.scalar("loss", cross_entropy)
  89. # cross_entropy = -tf.reduce_mean(y_input * tf.log(y_pred))
  90. # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cross_entropy)
  91. # Performance Measures
  92. with tf.name_scope("accuracy-model"):
  93. # y_pre和y_input一行对应一个标签,行数对应batch的size大小
  94. correct_prediction = tf.equal(y_pred_cls, y_true_cls)
  95. accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
  96. # 给损失模型的输出添加scalar,用来观察accracy的收敛曲线
  97. tf.summary.scalar("test_acc", accuracy)
  98. # correct_prediction = tf.equal(y_pred_cls, y_true_cls)
  99. # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
  100. def optimize(num_iterations):
  101. total_iterations = 0
  102. start_time = time.time()
  103. # 调用 merge_all() 收集所有的操作数据
  104. merged = tf.summary.merge_all()
  105. # 模型运行产生的所有数据保存到 ./tensorflow 文件夹供 TensorBoard 使用
  106. writer = tf.summary.FileWriter('./tensorboard', sess.graph, filename_suffix="_mnist")
  107. for i in range(total_iterations, total_iterations + num_iterations):
  108. x_batch, y_batch = mnist.train.next_batch(train_batch_size)
  109. feed_dict_train_op = {x_input: x_batch, y_input: y_batch, keep_prob: 0.5}
  110. feed_dict_test = {x_input: mnist.test.images, y_input: mnist.test.labels, keep_prob: 1.0}
  111. # summary = sess.run(merged, feed_dict={x_input: x_batch, y_input: y_batch})
  112. # train_step.run(feed_dict={x_input: x_batch, y_input: y_batch})
  113. # 等价于上述两条语句
  114. summary, _, train_loss = sess.run([merged, optimizer, cross_entropy],
  115. feed_dict=feed_dict_train_op)
  116. # sess.run(optimizer, feed_dict=feed_dict_train_op)
  117. # Print status every 100 iterations.
  118. if i % 100 == 0:
  119. # Calculate the accuracy on the training-set.
  120. acc = sess.run(accuracy, feed_dict=feed_dict_test)
  121. # Message for printing.
  122. msg = "Optimization Iteration:{0:>6}, Training Accuracy: {1:>6.1%}"
  123. # Print it.
  124. # print(msg.format(i + 1, acc))
  125. print(msg.format(i, acc))
  126. writer.add_summary(summary, i)
  127. # Update the total number of iterations performed
  128. total_iterations += num_iterations
  129. # Ending time
  130. end_time = time.time()
  131. # Difference between start and end_times.
  132. time_dif = end_time - start_time
  133. # Print the time-usage
  134. print("Time usage:" + str(timedelta(seconds=int(round(time_dif)))))
  135. def print_test_accuracy():
  136. # Number of images in the test-set.
  137. num_test = len(mnist.test.images)
  138. cls_pred = np.zeros(shape=num_test, dtype=np.int)
  139. i = 0
  140. while i < num_test:
  141. # The ending index for the next batch is denoted j.
  142. j = min(i + test_batch_size, num_test)
  143. # Get the images from the test-set between index i and j
  144. images = mnist.test.images[i:j, :]
  145. # Get the associated labels
  146. labels = mnist.test.labels[i:j, :]
  147. # Create a feed-dict with these images and labels.
  148. feed_dict = {x_input: images, y_input: labels, keep_prob: 1.0}
  149. # Calculate the predicted class using Tensorflow.
  150. cls_pred[i:j] = sess.run(y_pred_cls, feed_dict=feed_dict)
  151. # Set the start-index for the next batch to the
  152. # end-index of the current batch
  153. i = j
  154. cls_true = mnist.test.cls
  155. correct = (cls_true == cls_pred)
  156. correct_sum = correct.sum()
  157. acc = float(correct_sum) / num_test
  158. # Print the accuracy
  159. msg = "Accuracy on Test-Set: {0:.1%} ({1}/{2})"
  160. print(msg.format(acc, correct_sum, num_test))
  161. with tf.Session() as sess:
  162. init = tf.global_variables_initializer()
  163. sess.run(init)
  164. train_batch_size = 50
  165. test_batch_size = 256
  166. # Performance after 20000 optimization iterations
  167. optimize(20000)
  168. print_test_accuracy()
  169. # 输出结果:
  170. # Optimization Iteration: 19800, Training Accuracy: 99.3%
  171. # Optimization Iteration: 19900, Training Accuracy: 99.3%
  172. # Time usage:0:01:28
  173. # Accuracy on Test-Set: 99.3% (9933/10000)

手动下载mnist数据集地址:http://yann.lecun.com/exdb/mnist/ 

LeNet-5模型框架:

      

LeNet-5模型每一层的结构:

(1)第一层:卷积层

这一层的输入就是原始的图像像素,LeNet-5模型接受的输入层大小为32*32*1。第一个卷积层过滤器的尺寸为5*5,深度为6,不使用全0填充,步长为1。因为没有使用全0填充,所以这一层的输出的尺寸为32-5+1=28,深度为6。这一个卷积层总共有 5x5x1x6+6=156个参数,其中6个为偏置项参数。因为下一层的节点矩阵有28x28x6=4704个节点,每个节点和5x5=25个当前层节点相连,所以本层卷积层总共有4704 x(25+1)=122304个连接。

(2)第二层:池化层

这一层的输入为第一层的输出,是一个28x28x6的节点矩阵。本层采用的过滤器大小为2x2,长和宽的步长均为2,所以本层的输出矩阵的大小为14*14*6。

(3)第三层:卷积层

本层的输入矩阵大小为14x14x6,使用的过滤器大小为5x5,深度为16。本层不使用全0填充,步长为1。本层的输出矩阵大小为10x10x16。按照标准的卷积层,本层应该有 5x5x6x16+16=2416个参数,10x10x16x (25+1) =41600个连接。

(4)第四层:池化层

本层的输入矩阵大小为10x10x16,采用的过滤器大小为2x2,步长为2。本层的输出矩阵大小为5x5x16。

(5)第五层:全连接层

本层的输入矩阵大小为5x5x16,在LeNet-5模型的论文中将这一层称为卷积层,但是因为过滤器的大小就是5x5,所以和全连接层没有区别,在TensorFlow程序实现中也会将这一层看成全连接层。本层的输出节点个数为120,总共有 5x5x16x120+120=48120个参数。同时本层有5x5x16x120+120=48120个连接。

(6)第六层:全连接层

本层的输入节点个数为120个,输出节点个数为84个,总共参数为120x84+84=10164 个。同时本层有120x84+84=10164个连接。

(7)第七层:全连接层

本层的输入节点个数为84个,输出节点个数为10个,总共参数为84x10+10=850个。同时本层有84x10+10=850个连接。

本程序是根据LeNet-5实现手写体数字识别分类 ,模型框架如图所示:

                                        

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/862627
推荐阅读
相关标签
  

闽ICP备14008679号