赞
踩
写在前面:
“Talk is cheap, show me the code.”
本项目为大一学习pytorch的练手之作,代码参考较多,若有错漏请不吝赐教。
目录
python 3.9.9
opencv-python 4.5.4.60
torch 1.10.0+cu113
numpy 1.22.3
torchvision 0.11.1+cu113
数据集的获取完全参考以下博客中的人脸数据获取方法,使用opencv中的haarcascade_frontalface_alt2.xml 模型进行人脸识别,框出每一张人脸并保存到本地。
利用python、tensorflow、opencv实现人脸识别(包会)!_就是这个七昂的博客-CSDN博客_tensorflow 人脸识别
-
- import cv2
- import sys
-
- def CatchPICFromVideo(window_name, camera_idx, catch_pic_num, path_name):
- cv2.namedWindow(window_name)
-
- #视频来源,可以来自一段已存好的视频,也可以直接来自USB摄像头
- cap = cv2.VideoCapture(camera_idx)
-
- #告诉OpenCV使用人脸识别分类器
- classfier = cv2.CascadeClassifier("D:\\opencv\\build\\etc\\haarcascades\\haarcascade_frontalface_alt2.xml")
-
- #识别出人脸后要画的边框的颜色,RGB格式
- color = (0, 255, 0)
-
- num = 0
- while cap.isOpened():
- ok, frame = cap.read() #读取一帧数据
- if not ok:
- break
-
- grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #将当前桢图像转换成灰度图像
-
- #人脸检测,1.2和2分别为图片缩放比例和需要检测的有效点数
- faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
- if len(faceRects) > 0: #大于0则检测到人脸
- for faceRect in faceRects: #单独框出每一张人脸
- x, y, w, h = faceRect
-
- #将当前帧保存为图片
- img_name = '%s/%d.jpg'%(path_name, num)
- image = frame[y - 10: y + h + 10, x - 10: x + w + 10]
- cv2.imwrite(img_name, image)
-
- num += 1
- if num > (catch_pic_num): #如果超过指定最大保存数量退出循环
- break
-
- #画出矩形框
- cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, 2)
-
- #显示当前捕捉到了多少人脸图片
- font = cv2.FONT_HERSHEY_SIMPLEX
- cv2.putText(frame,'num:%d' % (num),(x + 30, y + 30), font, 1, (255,0,255),4)
-
- #超过指定最大保存数量结束程序
- if num > (catch_pic_num): break
-
- #显示图像
- cv2.imshow(window_name, frame)
- c = cv2.waitKey(10)
- if c & 0xFF == ord('q'):
- break
-
- #释放摄像头并销毁所有窗口
- cap.release()
- cv2.destroyAllWindows()
-
- if __name__ == '__main__':
- if len(sys.argv) != 1:
- print("Usage:%s camera_id face_num_max path_name\r\n" % (sys.argv[0]))
- else:
- CatchPICFromVideo("截取人脸", 0, 1000, 'C:\\Users\\73559\\Desktop\\ml\\pic3')

捕获结果:
(pytorch自带的DataLoader还不太会用,正在学习中,届时将更新本节内容,本节将用传统方法对数据集进行处理)
读取文件夹中图片后,使用opencv对图片进行resize,避免暴力resize使图片像素损失
- import os
- import numpy as np
- import cv2
- import random
- # from torch.utils.data import DataLoader 学习中...
-
- TRAIN_RATE = 0.8
- VALID_RATE = 0.1
- SAMPLE_QUANTITY = 2000
-
- IMAGE_SIZE = 227
- MYPATH = "C:\\Users\\73559\\Desktop\\ml\\data"
-
- #读取训练数据
- images = []
- labels = []
- def read_path(path_name):
- for dir_item in os.listdir(path_name):
- #从初始路径开始叠加,合并成可识别的操作路径
- full_path = os.path.abspath(os.path.join(path_name, dir_item))
- if os.path.isdir(full_path): #如果是文件夹,继续递归调用
- read_path(full_path)
- else: #文件
- if dir_item.endswith('.jpg'):
- image = cv2.imread(full_path)
- image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_AREA) #修改图片的尺寸为64*64
- images.append(image)
- labels.append(path_name)
- return images,labels
-
-
- #从指定路径读取训练数据
- def load_dataset(path_name):
- images,labels = read_path(path_name)
-
- #两个人共2000张图片,IMAGE_SIZE为64,故尺寸为2000 * 64 * 64 * 3
- #图片为64 * 64像素,一个像素3个通道(RGB)
- #标注数据,'pic'文件夹下都是我的脸部图像,全部指定为0,另外一个文件夹下是同学的,全部指定为1
- labels = np.array([0 if label.endswith('pic') else 1 for label in labels])
- #简单交叉验证
- images = list(images)
- for i in range(len(images)):
- images[i] = [images[i],labels[i]]
- random.shuffle(images)
- train_data = []
- test_data = []
- valid_data = []
- #训练集
- for i in range(int(SAMPLE_QUANTITY * TRAIN_RATE)):
- train_data.append(images[i])
- #验证集
- for i in range(int(SAMPLE_QUANTITY * TRAIN_RATE),int(SAMPLE_QUANTITY * (TRAIN_RATE + VALID_RATE))):
- valid_data.append(images[i])
- #测试集
- for i in range(int(SAMPLE_QUANTITY * (TRAIN_RATE + VALID_RATE)), SAMPLE_QUANTITY):
- test_data.append(images[i])
-
- return train_data, test_data , valid_data
-
- # if __name__ == "__main__":
- # train_loader = load_dataset(MYPATH)

网络结构:AlexNet
将最后全连接层一层改为2
- import torch
- # 定义网络结构
- class AlexNet(torch.nn.Module):
- def __init__(self):
- super(AlexNet,self).__init__()
- self.conv = torch.nn.Sequential(
- torch.nn.Conv2d(in_channels=3,
- out_channels=96,
- kernel_size=11,
- stride=4),
- torch.nn.BatchNorm2d(96),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(3,2),
- torch.nn.Conv2d(96,256,5,padding=2),
- torch.nn.BatchNorm2d(256),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(3,2),
- torch.nn.Conv2d(256,384,3,padding=1),
- torch.nn.ReLU(),
- torch.nn.Conv2d(384,384,3,padding=1),
- torch.nn.ReLU(),
- torch.nn.Conv2d(384,256,3,padding=1),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(3,2),
- )
- self.fc = torch.nn.Sequential(
- torch.nn.Linear(256*6*6,4096),
- torch.nn.ReLU(),
- torch.nn.Linear(4096, 4096),
- torch.nn.ReLU(),
- torch.nn.Linear(4096, 2),
- #torch.nn.Softmax(dim=1)
- #dim=1是按行softmax——降到(0,1)区间内相当于概率,此处不用softmax因为定义的交叉熵损失函数CrossEntropy包含了softmax
- )
- def forward(self, x):
- x = self.conv(x)
- #print(x.size())
- x = x.contiguous().view(-1,256*6*6) #使用.contiguous()防止用多卡训练的时候tensor不连续,即tensor分布在不同的内存或显存中
- x = self.fc(x)
- return x

注意事项:pytorch的卷积层参数所需 [输入通道,输出通道,卷积核大小,步长,补丁数]
其中 padding 支持 "same" 和 "valid"。"same"情况下不能自行设定除1以外的步长,可通过所给公式解一元方程求得padding。
卷积层函数详见官方文档:
Conv2d — PyTorch 1.11.0 documentation
相关资料:
在线卷积池化计算器:
BatchNorm2d原理:
BatchNorm2d原理、作用及其pytorch中BatchNorm2d函数的参数讲解_LS_learner的博客-CSDN博客_batchnorm2d
快速理解CNN原理(可视化):
SoftMax:
torch.nn.functional中softmax的作用及其参数说明 - 慢行厚积 - 博客园
注释写的应该挺详细了吧大概)
test data没有用到,可根据实际情况调用。
- import torch
- import numpy as np
- from torch.autograd import Variable
- from prepare import load_dataset, MYPATH #上文中的准备文件
- from alexnet import AlexNet #上文中的模型
- from torch import nn, optim
-
- #定义学习率
- learning_rate = 0.001
- #是否使用GPU训练
- device = torch.device("cuda" if torch.cuda.is_available() else"cpu")
- model = AlexNet().to(device)
-
- #定义交叉熵损失函数与SGD随机梯度下降
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.SGD(model.parameters(), lr=learning_rate)
- #数据载入
- train_data, test_data , valid_data = load_dataset(MYPATH)
- epoch = 0
-
- for data in train_data:
- img, label = data
-
- img = torch.LongTensor(img)
- #升维,因为pytorch规定输入卷积层的张量至少为4纬,故在此加一个batch的维度
- img = Variable(torch.unsqueeze(img, dim=0).float(), requires_grad=False)
- #改变张量维度的顺序,pytorch规定卷积层的张量为[batch_size,channel,image_height,image_width],即此处要求2000*3*64*64,而我们原来为2000*64*64*3。
- img= np.transpose(img, (0,3,1,2))
-
- #label不能直接转换为LongTensor否则会报错,原因未知-_-
- label = torch.tensor(label)
- label = label.long()
- #转换为向量,否则无法进行比较
- label = torch.flatten(label)
- label = Variable(label)
-
- img = img.to(device)
- label = label.to(device)
-
- out = model(img)
-
- loss = criterion(out, label)
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- epoch+=1
- if epoch%50 == 0:
- print('epoch: {}, loss: {:.4}'.format(epoch, loss.data.item()))
-
- model.eval()
- eval_loss = 0
- eval_acc = 0
- for data in valid_data:
- img, label = data
- img = torch.LongTensor(img)
- img = Variable(torch.unsqueeze(img, dim=0).float(), requires_grad=False)
- img= np.transpose(img, (0,3,1,2))
-
- label = torch.tensor(label)
- label = label.long()
- label = torch.flatten(label)
- label = Variable(label)
-
- img = img.to(device)
- label = label.to(device)
-
- out = model(img)
-
- loss = criterion(out, label)
- eval_loss += loss.data.item()*label.size(0)
- _, pred = torch.max(out, 1)
- num_correct = (pred == label).sum()
- eval_acc += num_correct.item()
-
- print('Test Loss: {:.6f}, Acc: {:.6f}'.format(
- eval_loss / (len(valid_data)),
- eval_acc / (len(valid_data))
- ))
- #保存训练好的模型
- torch.save(model, 'net.pkl')

同样参考此博客,修改了调用模型以及img的处理。其他不再赘述。利用python、tensorflow、opencv实现人脸识别(包会)!_就是这个七昂的博客-CSDN博客_tensorflow 人脸识别j
- import numpy as np
- import cv2
- import sys
- import torch
- from torch.autograd import Variable
-
- if __name__ == '__main__':
- if len(sys.argv) != 1:
- print("Usage:%s camera_id\r\n" % (sys.argv[0]))
- sys.exit(0)
-
- #加载模型
- device = torch.device("cuda" if torch.cuda.is_available() else"cpu")
- model = torch.load('net.pkl').to(device)
-
- #框住人脸的矩形边框颜色
- color = (0, 255, 0)
-
- #捕获指定摄像头的实时视频流
- cap = cv2.VideoCapture(0)
-
- #人脸识别分类器本地存储路径
- cascade_path = "D:\\opencv\\build\\etc\\haarcascades\\haarcascade_frontalface_alt2.xml"
-
- #循环检测识别人脸
- while True:
- ret, frame = cap.read() #读取一帧视频
-
- if ret is True:
-
- #图像灰化,降低计算复杂度
- frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
- else:
- continue
- #使用人脸识别分类器,读入分类器
- cascade = cv2.CascadeClassifier(cascade_path)
-
- #利用分类器识别出哪个区域为人脸
- faceRects = cascade.detectMultiScale(frame_gray, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
- if len(faceRects) > 0:
- for faceRect in faceRects:
- x, y, w, h = faceRect
-
- #截取脸部图像提交给模型识别这是谁
- img = frame[y - 10: y + h + 10, x - 10: x + w + 10]
- img = cv2.resize(img, (227, 227), interpolation=cv2.INTER_AREA)
- img = torch.from_numpy(img)
- img = Variable(torch.unsqueeze(img, dim=0).float(), requires_grad=False)
- img = np.transpose(img, (0,3,1,2))
- img = img.to(device)
- faceID = model(img)
- ACC = str(faceID[0][0].item())
- cv2.putText(frame,"ACC:" + ACC[:6],
- (x - 40, y - 40), #坐标
- cv2.FONT_HERSHEY_SIMPLEX, #字体
- 1, #字号
- (255,0,255), #颜色
- 2) #字的线宽
-
- #如果是“我”
- if faceID[0][0] > faceID[0][1] and faceID[0][0] > 0.9:
- cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
-
- #文字提示是谁
- cv2.putText(frame,'It\'s me!',
- (x + 30, y + 30), #坐标
- cv2.FONT_HERSHEY_SIMPLEX, #字体
- 1, #字号
- (255,0,255), #颜色
- 2) #字的线宽
- else:
- pass
-
- cv2.imshow("me", frame)
-
- #等待10毫秒看是否有按键输入
- k = cv2.waitKey(10)
- #如果输入q则退出循环
- if k & 0xFF == ord('q'):
- break
-
- #释放摄像头并销毁所有窗口
- cap.release()
- cv2.destroyAllWindows()

结果示意图:
注:若有多块GPU则cuda要改成cuda:0(代表第一张GPU)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。