赞
踩
参考链接:ES 官方播客 from Yacine Younes 以及 刘晓国 大佬在 ES 百人大作战一期发布的 4.2.4 章节
前期准备: Python3 环境、CMake、dlib 库和 face_recognition 等,还需要了解 ES 的向量类型,可以参考本人另一篇文章ES 向量检索 dense_vector 类型
需要用到的类和数据很简单,如下图所示:
下面分别看一下两个 Python 类:
import face_recognition import numpy as np import sys import os from pathlib import Path from elasticsearch import Elasticsearch es = Elasticsearch([{'host':'localhost','port':9200}]) cwd = os.getcwd() print("cwd: " + cwd) # Get the images directory rootdir = cwd + "/images" print("rootdir: " + rootdir) for subdir, dirs, files in os.walk(rootdir): for file in files: print(os.path.join(subdir, file)) file_path = os.path.join(subdir, file) # 加载照片 image = face_recognition.load_image_file(file_path) # 返回照片中包含的人脸信息列表 face_locations = face_recognition.face_locations(image) # 对每个照片中的人脸进行128维度向量编码形成列表 face_encodings = face_recognition.face_encodings(image, face_locations) # 将结果存入 ES for face_encoding in face_encodings: print("Face found ==> ", face_encoding.tolist()) print("name: " + Path(file_path).stem) name = Path(file_path).stem face_encoding = face_encoding.tolist() # format a dictionary to be indexed e = { "face_name": name, "face_encoding": face_encoding } res = es.index(index = 'faces', doc_type ='_doc', body = e)
运行此脚本将数据导入到 ES 集群,其内存存储如下图所示:包含人名以及其人脸信息构成的 dense_vector 类型(类似128位数组)。
import face_recognition import numpy as np from elasticsearch import Elasticsearch import sys import os from elasticsearch import Elasticsearch es = Elasticsearch([{'host': '10.11.110.165', 'port': 9206}]) cwd = os.getcwd() # print("cwd: " + cwd) # Get the images directory rootdir = cwd + "/images_to_be_recognized" # print("rootdir: {0}".format(rootdir)) for subdir, dirs, files in os.walk(rootdir): for file in files: print(os.path.join(subdir, file)) file_path = os.path.join(subdir, file) # 与 getVectorFromPicture.py 类方式一致获取人脸编码列表 image = face_recognition.load_image_file(file_path) face_locations = face_recognition.face_locations(image) face_encodings = face_recognition.face_encodings(image, face_locations) # 对编码列表内的每个人脸信息,运行 ES 查询 i = 0 for face_encoding in face_encodings: i += 1 print("Face", i) response = es.search( index="faces", body={ # 控制只返回分数最高的一个 doc "size": 1, "_source": "face_name", "query": { "script_score": { "query": { "match_all": {} }, "script": { # 采用余弦方式进行计算 "source": "cosineSimilarity(params.query_vector, 'face_encoding')", "params": { "query_vector": face_encoding.tolist() } } } } } ) # 输出查询结果 for hit in response['hits']['hits']: # double score=float(hit['_score']) print("score: {}".format(hit['_score'])) if float(hit['_score']) > 0.92: print("==> This face match with ", hit['_source']['face_name'], ",the score is", hit['_score']) else: print("==> Unknown face")
运行查询如下脚本返回结果:算法识别出4个人脸,且对每个人脸都进行了查询,输出了每个人脸查询结果中得分最高的文档的人名和得分。
response = es.search( index="faces", body={ "size": 10, "_source": "face_name", "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "cosineSimilarity(params.query_vector, 'face_encoding')", "params": { "query_vector": face_encoding.tolist() } } } } } ) # print(response) for hit in response['hits']['hits']: # double score=float(hit['_score']) print("==> This face match with ", hit['_source']['face_name'], ",the score is", hit['_score'])
结果如图所示:可以看出完全相同的照片 ES 向量余弦计算结果为 1,其他人脸对比得分结果均为 0.86+,所以上述脚本配置的 >0.92 即算匹配较为合理,具体细节需要研究 face_recognition 的算法实现。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。