赞
踩
win10,jupyter notebook,python3.7,tensorflow2,cpu
先用一个sklearn自带的数据集做一个简单的人脸识别测试。
- from sklearn import datasets
- from matplotlib import pyplot as plt
- import tensorflow.keras as keras
- import numpy as np
- faces = datasets.fetch_olivetti_faces()
- print(faces.images.shape)
-
- i = 0
- plt.figure(figsize=(20, 20))
- for img in faces.images:
- #总共400张图,把图像分割成20X20
- plt.subplot(20, 20, i+1)
- plt.imshow(img, cmap="gray")
- #关闭x,y轴显示
- plt.xticks([])
- plt.yticks([])
- plt.xlabel(faces.target[i])
- i = i + 1
- plt.show()
- #人脸数据
- X = faces.images
- #人脸对应的标签
- y = faces.target
- print(X[0])
- print(y[0])
- X = X.reshape(400, 64, 64, 1)
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
-
- model = keras.Sequential()
- # 第一层卷积,卷积的数量为128,卷积的高和宽是3x3,激活函数使用relu
- model.add(keras.layers.Conv2D(128, kernel_size=3, activation='relu', input_shape=(64, 64, 1)))
- # 第二层卷积
- model.add(keras.layers.Conv2D(64, kernel_size=3, activation='relu'))
- #把多维数组压缩成一维,里面的操作可以简单理解为reshape,方便后面Dense使用
- model.add(keras.layers.Flatten())
- #对应cnn的全链接层,可以简单理解为把上面的小图汇集起来,进行分类
- model.add(keras.layers.Dense(40, activation='softmax'))
- model.compile(optimizer='adam',
- loss='sparse_categorical_crossentropy',
- metrics=['accuracy'])
-
- model.fit(X_train, y_train, epochs=10)
-
- y_predict = model.predict(X_test)
- print(y_test[0], np.argmax(y_predict[0]))
ok。下面我们利用openCV和tensorflow建立自己的人脸识别模型
- import cv2
- import sys
- import os
- import numpy as np
- import random
-
- import tensorflow as tf
- from tensorflow.keras.layers import Dense, Flatten, Conv2D
- from tensorflow.keras import Model
- from sklearn.model_selection import train_test_split
- from keras import backend as K
- from PIL import Image, ImageDraw, ImageFont
- from __future__ import absolute_import, division, print_function, unicode_literals
该步骤使用openCV捕捉摄像头或视频,按帧数取图片,每个标签取200帧。
摄像头和视频地址设置见注释,需要注意的是级联分类器的地址需改为自己的openCV 库安装位置
- def CatchPICFromVideo(window_name, camera_idx, catch_pic_num, path_name):
- cv2.namedWindow(window_name)
- #设置视频来源,0为摄像头,“视频地址,如C:/Users/lishu/Desktop/ob/x/face/zjm/zjm.mp4”
- cap = cv2.VideoCapture(0)
- #openCV级联分类器地址:openCV库安装地址找haarcascade_frontalface_alt2.xml文件
- classfier = cv2.CascadeClassifier("C:/Users/lishu/Anaconda3/Lib/site-packages/cv2/data/haarcascade_frontalface_alt2.xml")
- color = (255, 0, 0)
- num = 0
- while cap.isOpened():
- ok, frame = cap.read()#frame是读取的图像,三阶矩阵
- if not ok:
- break
- grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
- faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
- if len(faceRects) > 0:
- for faceRect in faceRects:
- x, y, w, h = faceRect
- img_name = '%s%d.jpg'%(path_name, num)#保存当前帧为图片
- image = frame[y - 10: y + h + 10, x - 10: x + w + 10]
- cv2.imwrite(img_name, image)
- num += 1
- if num > (catch_pic_num):
- break
- cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, 2)
- font = cv2.FONT_HERSHEY_SIMPLEX
- cv2.putText(frame, 'num:%d' % (num), (30, 30), font, 1, (255, 0, 0), 1)
- if num > (catch_pic_num):break
- cv2.imshow(window_name, frame)
- c = cv2.waitKey(10)
- if c & 0xFF == ord('q'):
- break
- cap.release()
- cv2.destroyAllWindows()
- if __name__ == '__main__':
- CatchPICFromVideo("catch_face_data", 0, 200-1, 'C:/Users/lishu/Desktop/ob/xx/face/axe/axe')
- IMAGE_SIZE = 64 #将图片大小设置为64*64
-
- #按照指定图像大小调整尺寸
- def resize_image(image, height = IMAGE_SIZE, width = IMAGE_SIZE):
- top, bottom, left, right = (0, 0, 0, 0)
-
- #获取图像尺寸
- h, w, _ = image.shape
-
- #对于长宽不相等的图片,找到最长的一边
- longest_edge = max(h, w)
-
- #计算短边需要增加多少像素宽度使其与长边等长
- if h < longest_edge:
- dh = longest_edge - h
- top = dh // 2
- bottom = dh - top
- elif w < longest_edge:
- dw = longest_edge - w
- left = dw // 2
- right = dw - left
- else:
- pass
-
- #RGB颜色
- BLACK = [0, 0, 0]
-
- #给图像增加边界,是图片长、宽等长,cv2.BORDER_CONSTANT指定边界颜色由value指定
- constant = cv2.copyMakeBorder(image, top , bottom, left, right, cv2.BORDER_CONSTANT, value = BLACK)
- #将图像设置为灰度图
- constant = cv2.cvtColor(constant,cv2.COLOR_BGR2GRAY)
-
- #调整图像大小并返回
- return cv2.resize(constant, (height, width))
-
- def read_path(path_name):
- for dir_item in os.listdir(path_name):
- #从初始路径开始叠加,合并成可识别的操作路径
- full_path = os.path.abspath(os.path.join(path_name, dir_item))
-
- if os.path.isdir(full_path): #如果是文件夹,继续递归调用
- read_path(full_path)
- else: #文件
- if dir_item.endswith('.jpg'):
- image = cv2.imread(full_path)
- image = resize_image(image, IMAGE_SIZE, IMAGE_SIZE)
-
-
- images.append(image)
- labels.append(path_name)
-
- return images,labels
-
-
- #从指定路径读取训练数据
- def load_dataset(path_name):
- images,labels = read_path(path_name)
-
- #将输入的所有图片转成四维数组,尺寸为(图片数量*IMAGE_SIZE*IMAGE_SIZE*3)
- #尺寸为 200*5* 64 * 64 * 3
- #5个人 每个人200张 图片为64 * 64像素,一个像素3个颜色值(RGB)
- images = np.array(images)
- print(images.shape)
-
- #标注数据(采用onehot编码),分别不同指定标签(请注意必须从0开始算标签)
- temp=0
- for label in labels :
- if label.endswith('axe') :
- labels[temp]=0
- elif label.endswith('ef') :
- labels[temp]=1
-
- temp=temp+1
- return images, labels
adam优化器参数learning_rate根据经验设置为0.001,
batch设置为2的幂次,数值越大,收敛越快,但单次迭代时间越长。
epoch参数,设置学习轮数,根据训练准确度来设置,这里设置为100,不宜过大,防止过拟合。
- class Dataset:
- def __init__(self, path_name):
- #训练集
- self.train_images = None
- self.train_labels = None
-
-
- #测试集
- self.test_images = None
- self.test_labels = None
-
- #数据集加载路径
- self.path_name = path_name
-
- #当前库采用的维度顺序
- self.input_shape = None
-
- self.nb_classes=None
-
-
- #加载数据集并按照交叉验证的原则划分数据集并进行相关预处理工作
- def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE,
- img_channels = 1, nb_classes = 5): #灰度图所以通道数为1-5个类别 所以分组数为5
- #加载数据集到内存
- images, labels = load_dataset(self.path_name)
-
- train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) #将总数据按0.3比重随机分配给训练集和测试集
-
-
- train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) #由于TensorFlow需要通道数,我们上一步设置为灰度图,所以这里为1,否则彩色图为3
- test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
- self.input_shape = (img_rows, img_cols, img_channels)
-
- #输出训练集、测试集的数量
- print(train_images.shape[0], 'train samples')
- print(test_images.shape[0], 'test samples')
-
-
- #像素数据浮点化以便归一化
- train_images = train_images.astype('float32')
- test_images = test_images.astype('float32')
-
- #将其归一化,图像的各像素值归一化到0~1区间
- train_images /= 255
- test_images /= 255
-
-
-
- self.train_images = train_images
- self.test_images = test_images
- self.train_labels = train_labels
- self.test_labels = test_labels
- self.nb_classes = nb_classes
-
-
-
-
- #建立CNN模型
- class CNN(tf.keras.Model):
- #模型初始化
- def __init__(self):
- super().__init__()
- self.conv1 = tf.keras.layers.Conv2D(
- filters=32, # 卷积层神经元(卷积核)数目
- kernel_size=[3, 3], # 感受野大小
- padding='same', # padding策略(vaild 或 same)
- activation=tf.nn.relu, # 激活函数
- )
-
- self.conv3=tf.keras.layers.Conv2D( filters=32, kernel_size=[3, 3], activation=tf.nn.relu )
- self.pool3 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
- self.conv4=tf.keras.layers.Conv2D( filters=64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu )
- self.conv5=tf.keras.layers.Conv2D( filters=64, kernel_size=[3, 3], activation=tf.nn.relu )
- self.pool4 = tf.keras.layers.MaxPool2D(pool_size=[2, 2])
- self.flaten1=tf.keras.layers.Flatten()
- self.dense3 = tf.keras.layers.Dense(units=512,activation=tf.nn.relu)
- self.dense4 = tf.keras.layers.Dense(units=5) #最后分类 5个单位
-
-
- #模型输出
- def call(self, inputs):
- x = self.conv1(inputs)
- x = self.conv3(x)
- x = self.pool3(x)
- x = self.conv4(x)
- x = self.conv5(x)
- x = self.pool4(x)
- x = self.flaten1(x)
- x = self.dense3(x)
- x = self.dense4(x)
- output = tf.nn.softmax(x)
- return output
-
-
- #识别人脸
- def face_predict(self, image):
-
- image = resize_image(image)
- image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 1))
-
- #浮点并归一化
- image = image.astype('float32')
- image /= 255
-
- #给出输入属于各个类别的概率
- result = self.predict(image)
- #print('result:',result[0])
-
- #返回类别预测结果
- return result[0]
- if __name__ == '__main__':
-
- learning_rate = 0.001 #学习率
- batch = 32 #batch数
- EPOCHS = 200 #学习轮数
-
- dataset = Dataset('./face/') #数据都保存在这个文件夹下
- images = []
- labels = []
- dataset.load()
-
- model = CNN()#模型初始化
- optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) #选择优化器
- loss_object = tf.keras.losses.SparseCategoricalCrossentropy() #选择损失函数
- train_loss = tf.keras.metrics.Mean(name='train_loss') #设置变量保存训练集的损失值
- train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')#设置变量保存训练集的准确值
- test_loss = tf.keras.metrics.Mean(name='test_loss')#设置变量保存测试集的损失值
- test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')#设置变量保存测试集的准确值
-
-
- @tf.function
- def train_step(images, labels):
- with tf.GradientTape() as tape:
- predictions = model(images)
- loss = loss_object(labels, predictions)
- gradients = tape.gradient(loss, model.trainable_variables)
- optimizer.apply_gradients(zip(gradients, model.trainable_variables))#优化器更新数据
-
- train_loss(loss)#更新损失值
- train_accuracy(labels, predictions)#更新准确值
-
- @tf.function
- def test_step(images, labels):
- predictions = model(images)
- t_loss = loss_object(labels, predictions)
-
- test_loss(t_loss)
- test_accuracy(labels, predictions)
-
-
-
-
- for epoch in range(EPOCHS):
-
- train_ds = tf.data.Dataset.from_tensor_slices((dataset.train_images, dataset.train_labels)).shuffle(300).batch(batch)
- test_ds = tf.data.Dataset.from_tensor_slices((dataset.test_images, dataset.test_labels)).shuffle(300).batch(batch)
-
- for images, labels in train_ds:
- train_step(images, labels)
-
- for test_images, test_labels in test_ds:
- test_step(test_images, test_labels)
-
-
-
- template = 'Epoch {} \nTrain Loss:{:.2f},Train Accuracy:{:.2%}\nTest Loss :{:.2f},Test Accuracy :{:.2%}'
- print (template.format(epoch+1,train_loss.result(),train_accuracy.result(),test_loss.result(),test_accuracy.result())) #打印
-
- model.save_weights('./model/face') #保存权重模型 命名为face
使用摄像头检测人脸,如检测视频中的人脸,改变cap = cv2.VideoCapture()参数,()中添加视频地址。
按q键退出检测窗口。
- if __name__ == '__main__':
-
- #加载模型
- model = CNN()
- model.load_weights('./model/face') #读取模型权重参数
-
- #框住人脸的矩形边框颜色
- color = (0, 255, 255)
-
- #捕获指定摄像头的实时视频流
- cap = cv2.VideoCapture(0)
-
- #人脸识别分类器本地存储路径
- cascade_path ="C:/Users/lishu/Anaconda3/Lib/site-packages/cv2/data/haarcascade_frontalface_alt2.xml"
-
- #循环检测识别人脸
- while True:
- ret, frame = cap.read() #读取一帧视频
-
- if ret is True:
-
- #图像灰化,降低计算复杂度
- frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
- else:
- continue
- #使用人脸识别分类器,读入分类器
- cascade = cv2.CascadeClassifier(cascade_path)
-
- #利用分类器识别出哪个区域为人脸
- faceRects = cascade.detectMultiScale(frame_gray, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
- if len(faceRects) > 0:
- for faceRect in faceRects:
- x, y, w, h = faceRect
-
- #截取脸部图像提交给模型识别这是谁
- image = frame[y - 10: y + h + 10, x - 10: x + w + 10]
- face_probe = model.face_predict(image) #获得预测值
- cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2和PIL中颜色的hex码的储存顺序不同
- pilimg = Image.fromarray(frame)
- draw = ImageDraw.Draw(pilimg) # 图片上打印出所有人的预测值
- font = ImageFont.truetype("simkai.ttf", 20, encoding="utf-8") # 参数1:字体文件路径,参数2:字体大小q
- # draw.text((x+25,y-95), 'axe:{:.2%}'.format(face_probe[0]), (0, 255, 0), font=font)
- # draw.text((x+25,y-70), 'ef:{:.2%}'.format(face_probe[1]), (0, 0, 255), font=font)
-
- if face_probe[0] > 0.7:
- draw.text((x+25,y-75), 'axe:{:.2%}'.format(face_probe[0]), (0, 0, 255), font=font)
- elif face_probe[1] > 0.7:
- draw.text((x+25,y-75), 'ef:{:.2%}'.format(face_probe[1]), (0, 255, 0), font=font)
- else:
- draw.text((x+25, y-75), 'stranger',(255, 0, 0), font=font)
-
- frame = cv2.cvtColor(np.array(pilimg), cv2.COLOR_RGB2BGR)
-
- cv2.imshow("ShowTime", frame)
-
- #等待10毫秒看是否有按键输入
- k = cv2.waitKey(10)
- #如果输入q则退出循环
- if k & 0xFF == ord('q'):
- break
-
- #释放摄像头并销毁所有窗口
- cap.release()
- cv2.destroyAllWindows()
-
对if __name__ == '__main__':的解释,作为脚本时会执行,如果作为模块加载到其他脚本不会执行。
本文使用jupyter notebook执行代码。
捕捉图像模块,建议捕捉两人以上人脸,否则容易过拟合。在face文件夹下建立人脸数据文件夹。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。