赞
踩
作为RNN的第二个demo,笔者将会介绍RNN模型在识别验证码方面的应用。
我们的验证码及样本数据集来自于博客: CNN大战验证码,在这篇博客中,我们已经准备好了所需的样本数据集,不需要在辛辛苦苦地再弄一遍,直接调用data.csv就可以进行建模了。
用TensorFlow搭建简单RNN模型,因为是多分类问题,所以在最后的输出部分再加一softmax层,损失函数采用对数损失函数,optimizer选择RMSPropOptimizer。以下是RNN模型的完整Python代码(TensorFlow_RNN.py):
# -*- coding: utf-8 -*- import tensorflow as tf import logging # 设置日志 logging.basicConfig(level = logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s') logger = logging.getLogger(__name__) # RNN类 class RNN: # 初始化 # 参数说明: element_size: 元素大小 # time_steps: 序列大小 # num_classes: 目标变量的类别总数 # batch_size: 图片总数 # hidden_layer_size: 隐藏层的神经元个数 # epoch: 训练次数 # learning_rate: 用RMSProp优化时的学习率 # save_model_path: 模型保存地址 def __init__(self, element_size, time_steps, num_classes, batch_size, hidden_layer_size = 150, epoch = 1000, learning_rate=0.001, save_model_path = r'./logs/RNN_train.ckpt'): self.epoch = epoch self.learning_rate = learning_rate self.save_model_path = save_model_path # 设置RNN结构 self.element_size = element_size self.time_steps = time_steps self.num_classes = num_classes self.batch_size = batch_size self.hidden_layer_size = hidden_layer_size # 输入向量和输出向量 self._inputs = tf.placeholder(tf.float32, shape=[None, self.time_steps, self.element_size], name='inputs') self.y = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='inputs') # 利用TensorFlow的内置函数BasicRNNCell, dynamic_rnn来构建RNN的基本模块 rnn_cell = tf.contrib.rnn.BasicRNNCell(self.hidden_layer_size) outputs, _ = tf.nn.dynamic_rnn(rnn_cell, self._inputs, dtype=tf.float32) Wl = tf.Variable(tf.truncated_normal([self.hidden_layer_size, self.num_classes], mean=0, stddev=.01)) bl = tf.Variable(tf.truncated_normal([self.num_classes], mean=0, stddev=.01)) def get_linear_layer(vector): return tf.matmul(vector, Wl) + bl # 取输出的向量outputs中的最后一个向量最为最终输出 last_rnn_output = outputs[:, -1, :] self.final_output = get_linear_layer(last_rnn_output) # 定义损失函数并用RMSProp优化 softmax = tf.nn.softmax_cross_entropy_with_logits(logits=self.final_output, labels=self.y) self.cross_entropy = tf.reduce_mean(softmax) self.train_model = tf.train.RMSPropOptimizer(self.learning_rate, 0.9).minimize(self.cross_entropy) self.saver = tf.train.Saver() logger.info('Initialize RNN model...') # 模型训练 def train(self, x_data, y_data): logger.info('Training RNN model...') with tf.Session() as sess: # 对所有变量进行初始化 sess.run(tf.global_variables_initializer()) # 进行迭代学习 feed_dict = {self._inputs: x_data, self.y: y_data} for i in range(self.epoch + 1): sess.run(self.train_model, feed_dict=feed_dict) if i % int(self.epoch / 50) == 0: # to see the step improvement print('已训练%d次, loss: %s.' % (i, sess.run(self.cross_entropy, feed_dict=feed_dict))) # 保存RNN模型 logger.info('Saving RNN model...') self.saver.save(sess, self.save_model_path) # 对新数据进行预测 def predict(self, data): with tf.Session() as sess: logger.info('Restoring RNN model...') self.saver.restore(sess, self.save_model_path) predict = sess.run(self.final_output, feed_dict={self._inputs: data}) return predict
对样本数据集data.csv进行RNN建模,将数据集分为训练集和测试集,各占70%和30%.因为图片的大小为16*20,所以在将图片看成序列时,序列的长度为20,每一时刻的向量含有16个元素,共有31个目标类,取隐藏层大小为300,总共训练1000次。 完整的Python代码如下:
# -*- coding: utf-8 -*- """ 数字字母识别 利用RNN对验证码的数据集进行多分类 """ from TensorFlow_RNN import RNN import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.preprocessing import LabelBinarizer CSV_FILE_PATH = 'F://验证码识别/data.csv' # CSV 文件路径 df = pd.read_csv(CSV_FILE_PATH) # 读取CSV文件 # 数据集的特征 features = ['v'+str(i+1) for i in range(16*20)] labels = df['label'].unique() # 对样本的真实标签进行标签二值化 lb = LabelBinarizer() lb.fit(labels) y_ture = pd.DataFrame(lb.transform(df['label']), columns=['y'+str(i) for i in range(31)]) y_bin_columns = list(y_ture.columns) for col in y_bin_columns: df[col] = y_ture[col] # 将数据集分为训练集和测试集,训练集70%, 测试集30% x_train, x_test, y_train, y_test = train_test_split(df[features], df[y_bin_columns], \ train_size = 0.7, test_size=0.3, random_state=123) # 构建RNN网络 # 模型保存地址 MODEL_SAVE_PATH = 'F:///验证码识别/logs/RNN_train.ckpt' # RNN初始化 element_size = 16 time_steps = 20 num_classes = 31 hidden_layer_size = 300 batch_size = 960 new_x_train = np.array(x_train).reshape((-1, time_steps, element_size)) new_x_test = np.array(x_test).reshape((-1, time_steps, element_size)) rnn = RNN(element_size=element_size, time_steps=time_steps, num_classes=num_classes, batch_size=batch_size, hidden_layer_size= hidden_layer_size, epoch=1000, save_model_path=MODEL_SAVE_PATH, ) # 训练RNN rnn.train(new_x_train, y_train) # 预测数据 y_pred = rnn.predict(new_x_test) # 预测分类 label = '123456789ABCDEFGHJKLNPQRSTUVXYZ' prediction = [] for pred in y_pred: label = labels[list(pred).index(max(pred))] prediction.append(label) # 计算预测的准确率 x_test['prediction'] = prediction x_test['label'] = df['label'][y_test.index] print(x_test.head()) accuracy = accuracy_score(x_test['prediction'], x_test['label']) print('RNN的预测准确率为%.2f%%.'%(accuracy*100))
以下是模型训练的结果:
2018-09-26 11:18:12,339 - INFO: Initialize RNN model... 2018-09-26 11:18:12,340 - INFO: Training RNN model... 已训练0次, loss: 3.43417. 已训练20次, loss: 3.42695. 已训练40次, loss: 3.40638. 已训练60次, loss: 3.33286. 已训练80次, loss: 2.78305. 已训练100次, loss: 2.33391. 已训练120次, loss: 1.15871. 已训练140次, loss: 0.659932. 已训练160次, loss: 0.566225. 已训练180次, loss: 0.397372. 已训练200次, loss: 0.317218. 已训练220次, loss: 0.346782. 已训练240次, loss: 0.639625. 已训练260次, loss: 0.0575929. 已训练280次, loss: 0.100429. 已训练300次, loss: 0.0347529. 已训练320次, loss: 0.0189503. 已训练340次, loss: 0.0265893. 已训练360次, loss: 0.0151181. 已训练380次, loss: 1.18662. 已训练400次, loss: 0.0164317. 已训练420次, loss: 0.00819814. 已训练440次, loss: 0.0041992. 已训练460次, loss: 0.0206414. 已训练480次, loss: 0.00826409. 已训练500次, loss: 0.00398952. 已训练520次, loss: 0.00214751. 已训练540次, loss: 0.0365587. 已训练560次, loss: 0.00738376. 已训练580次, loss: 0.00302118. 已训练600次, loss: 0.00161713. 已训练620次, loss: 0.000885372. 已训练640次, loss: 1.24874. 已训练660次, loss: 0.00601175. 已训练680次, loss: 0.0023275. 已训练700次, loss: 0.00121995. 已训练720次, loss: 0.000705643. 已训练740次, loss: 0.000407971. 已训练760次, loss: 0.000219642. 已训练780次, loss: 0.0889083. 已训练800次, loss: 0.00395974. 已训练820次, loss: 0.00131215. 已训练840次, loss: 0.000631665. 已训练860次, loss: 0.000342329. 已训练880次, loss: 0.000191806. 已训练900次, loss: 0.000108547. 已训练920次, loss: 6.29806e-05. 已训练940次, loss: 3.99281e-05. 已训练960次, loss: 0.0124334. 已训练980次, loss: 0.00142853. 2018-09-26 11:26:08,302 - INFO: Saving RNN model... 已训练1000次, loss: 0.000571731. 2018-09-26 11:26:08,761 - INFO: Restoring RNN model... INFO:tensorflow:Restoring parameters from F:///验证码识别/logs/RNN_train.ckpt 2018-09-26 11:26:08,761 - INFO: Restoring parameters from F:///验证码识别/logs/RNN_train.ckpt v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 ... v313 v314 v315 v316 \ 657 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 18 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 700 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 221 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1219 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 v317 v318 v319 v320 prediction label 657 1 1 1 1 G G 18 1 1 1 1 1 1 700 1 1 1 1 H H 221 1 1 1 1 5 5 1219 1 1 1 1 V V [5 rows x 322 columns] RNN的预测准确率为93.69%.
总共的训练时间为8分钟,在测试集上的准确为93.69%.与CNN相比,测试集上的准确率略高,训练时间却明显减少,因为CNN训练1000次的时间为75分钟。总的来说,该RNN模型在这个数据集的表现优于之前的CNN模型。
接着,我们利用刚才训练好的CNN模型,对新验证码进行识别,看看模型的识别效果。
笔者采集了50张新验证码,如下:
完整的预测新验证码的Python脚本如下:
# -*- coding: utf-8 -*- """ 利用训练好的RNN模型对验证码进行识别 (共训练960条数据,训练1000次测试集上的准确率为95.15%.) """ import os import cv2 import pandas as pd import numpy as np from TensorFlow_RNN import RNN def split_picture(imagepath): # 以灰度模式读取图片 gray = cv2.imread(imagepath, 0) # 将图片的边缘变为白色 height, width = gray.shape for i in range(width): gray[0, i] = 255 gray[height-1, i] = 255 for j in range(height): gray[j, 0] = 255 gray[j, width-1] = 255 # 中值滤波 blur = cv2.medianBlur(gray, 3) #模板大小3*3 # 二值化 ret,thresh1 = cv2.threshold(blur, 200, 255, cv2.THRESH_BINARY) # 提取单个字符 chars_list = [] image, contours, hierarchy = cv2.findContours(thresh1, 2, 2) for cnt in contours: # 最小的外接矩形 x, y, w, h = cv2.boundingRect(cnt) if x != 0 and y != 0 and w*h >= 100: chars_list.append((x,y,w,h)) sorted_chars_list = sorted(chars_list, key=lambda x:x[0]) for i,item in enumerate(sorted_chars_list): x, y, w, h = item cv2.imwrite('F://chars/%d.jpg'%(i+1), thresh1[y:y+h, x:x+w]) def remove_edge_picture(imagepath): image = cv2.imread(imagepath, 0) height, width = image.shape corner_list = [image[0,0] < 127, image[height-1, 0] < 127, image[0, width-1]<127, image[ height-1, width-1] < 127 ] if sum(corner_list) >= 3: os.remove(imagepath) def resplit_with_parts(imagepath, parts): image = cv2.imread(imagepath, 0) os.remove(imagepath) height, width = image.shape file_name = imagepath.split('/')[-1].split(r'.')[0] # 将图片重新分裂成parts部分 step = width//parts # 步长 start = 0 # 起始位置 for i in range(parts): cv2.imwrite('F://chars/%s.jpg'%(file_name+'-'+str(i)), \ image[:, start:start+step]) start += step def resplit(imagepath): image = cv2.imread(imagepath, 0) height, width = image.shape if width >= 64: resplit_with_parts(imagepath, 4) elif width >= 48: resplit_with_parts(imagepath, 3) elif width >= 26: resplit_with_parts(imagepath, 2) # rename and convert to 16*20 size def convert(dir, file): imagepath = dir+'/'+file # 读取图片 image = cv2.imread(imagepath, 0) # 二值化 ret, thresh = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY) img = cv2.resize(thresh, (16, 20), interpolation=cv2.INTER_AREA) # 保存图片 cv2.imwrite('%s/%s' % (dir, file), img) # 读取图片的数据,并转化为0-1值 def Read_Data(dir, file): imagepath = dir+'/'+file # 读取图片 image = cv2.imread(imagepath, 0) # 二值化 ret, thresh = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY) # 显示图片 bin_values = [1 if pixel==255 else 0 for pixel in thresh.ravel()] return bin_values def predict(rnn, VerifyCodePath, time_steps, element_size): dir = 'F://chars' files = os.listdir(dir) # 清空原有的文件 if files: for file in files: os.remove(dir + '/' + file) split_picture(VerifyCodePath) files = os.listdir(dir) if not files: print('查看的文件夹为空!') else: # 去除噪声图片 for file in files: remove_edge_picture(dir + '/' + file) # 对黏连图片进行重分割 for file in os.listdir(dir): resplit(dir + '/' + file) # 将图片统一调整至16*20大小 for file in os.listdir(dir): convert(dir, file) # 图片中的字符代表的向量 files = sorted(os.listdir(dir), key=lambda x: x[0]) table = [Read_Data(dir, file) for file in files] test_data = pd.DataFrame(table, columns=['v%d' % i for i in range(1, 321)]) new_test_data = np.array(test_data).reshape((-1, time_steps, element_size)) y_pred = rnn.predict(new_test_data) # 预测分类 prediction = [] labels = '123456789ABCDEFGHJKLNPQRSTUVXYZ' for pred in y_pred: label = labels[list(pred).index(max(pred))] prediction.append(label) TRUE_LABEL = VerifyCodePath.split('/')[-1].split(r'.')[0] return TRUE_LABEL, ''.join(prediction) def main(): # 创建RNN预测模型 # 模型保存地址 MODEL_SAVE_PATH = 'F:///验证码识别/logs/RNN_train.ckpt' # RNN初始化 element_size = 16 time_steps = 20 num_classes = 31 batch_size = 4 hidden_layer_size = 300 rnn = RNN(element_size=element_size, time_steps=time_steps, num_classes=num_classes, batch_size=batch_size, hidden_layer_size=hidden_layer_size, epoch=1000, save_model_path=MODEL_SAVE_PATH, ) # 预测验证码 pred_list = [] dir = 'F://VerifyCode/' for file in os.listdir(dir): VerifyCodePath = dir+file label, prediction = predict(rnn, VerifyCodePath, time_steps, element_size) pred_list.append((label, prediction)) # print('真实值为:%s, 预测结果为: %s.'%(label, prediction)) # 统计预测正确的验证码的数量及准确率 total_images = len(pred_list) correct_pred = sum([1 if x[0] == x[1] else 0 for x in pred_list]) accuracy = correct_pred*100/total_images print("\n一共有%d张图片,识别正确的图片为%d张,\n" "RNN的预测准确率为%.2f%%." %(total_images, correct_pred, accuracy)) main()
输出的结果如下:
一共有50张图片,识别正确的图片为45张,
RNN的预测准确率为90.00%.
识别的效果相当可以。
对于用RNN识别图像,有时候其表现不会比CNN模型差,在训练时间上有明显改善。
笔者将会持续RNN方面的研究,欢迎大家关注~
注意:本人现已开通微信公众号: 轻松学会Python爬虫(微信号为:easy_web_scrape), 欢迎大家关注哦~~
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。