赞
踩
Hi,大家好,我是半亩花海。在当今科技飞速发展的时代,我们身边充斥着各种智能设备,然而,如何更便捷地与这些设备进行交互却是一个不断被探索的课题。本文将主要介绍一个基于 OpenCV 的手势识别项目,通过手势来控制电脑屏幕亮度和音量大小,为用户提供了一种全新的交互方式。
目录
在开始介绍项目的实现细节之前,我们首先需要导入项目所需的必要库。这些库包括:
- # 导入必要库
- import math
- import sys
- import numpy as np
- import cv2
- import pygame
- import wmi
- import mediapipe as mp
- from ctypes import cast, POINTER
- from comtypes import CLSCTX_ALL
- from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
- import warnings # 忽略警告
- warnings.filterwarnings("ignore")
首先创建一个 HandKeyPoint 类,用于初始化手部关键点检测器,并提供对图像进行处理的方法。
- # 手部关键点类
- class HandKeyPoint:
- def __init__(self,
- static_image_mode=False,
- max_num_hands=2,
- model_complexity=1,
- min_detection_confidence=0.5,
- min_tracking_confidence=0.5):
- # 手部识别api
- self.mp_hands = mp.solutions.hands
- # 获取手部识别类
- self.hands = self.mp_hands.Hands(static_image_mode=static_image_mode,
- max_num_hands=max_num_hands,
- model_complexity=model_complexity,
- min_detection_confidence=min_detection_confidence,
- min_tracking_confidence=min_tracking_confidence)
-
- def process(self, image):
- # 将BGR转换为RGB
- img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- # 识别图像中的手势,并返回结果
- results = self.hands.process(img)
- # numpy格式的数据
- np_arr = landmarks_to_numpy(results)
- return results, np_arr
将手部关键点的检测结果(将 landmarks 格式的数据)转换为 numpy 数组,以便后续的处理和分析。
- # 将landmarks格式的数据转换为numpy格式的数据
- def landmarks_to_numpy(results):
- """
- 将landmarks格式的数据转换为numpy格式的数据
- numpy shape:(2, 21, 3)
- :param results:
- :return:
- """
- shape = (2, 21, 3)
- landmarks = results.multi_hand_landmarks
- if landmarks is None:
- # 没有检测到手
- return np.zeros(shape)
- elif len(landmarks) == 1:
- # 检测出一只手,先判断是左手还是右手
- label = results.multi_handedness[0].classification[0].label
- hand = landmarks[0]
- # print(label)
- if label == "Left":
- return np.array(
- [np.array([[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)]),
- np.zeros((21, 3))])
- else:
- return np.array([np.zeros((21, 3)),
- np.array(
- [[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)])])
- elif len(landmarks) == 2:
- # print(results.multi_handedness)
- lh_idx = 0
- rh_idx = 0
- for idx, hand_type in enumerate(results.multi_handedness):
- label = hand_type.classification[0].label
- if label == 'Left':
- lh_idx = idx
- if label == 'Right':
- rh_idx = idx
-
- lh = np.array(
- [[landmarks[lh_idx].landmark[i].x, landmarks[lh_idx].landmark[i].y, landmarks[lh_idx].landmark[i].z] for i
- in range(21)])
- rh = np.array(
- [[landmarks[rh_idx].landmark[i].x, landmarks[rh_idx].landmark[i].y, landmarks[rh_idx].landmark[i].z] for i
- in range(21)])
- return np.array([lh, rh])
- else:
- return np.zeros((2, 21, 3))
- # 画手势关键点
- def draw_landmark(img, results):
- if results.multi_hand_landmarks:
- for hand_landmark in results.multi_hand_landmarks:
- mp.solutions.drawing_utils.draw_landmarks(img,
- hand_landmark,
- mp.solutions.hands.HAND_CONNECTIONS,
- mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
- mp.solutions.drawing_styles.get_default_hand_connections_style())
-
- return img
为了平滑处理手势状态的变化,我们实现了一个 Buffer 类,用于缓存手势状态的变化,并提供了添加正例和负例的方法。
- # 缓冲区类
- class Buffer:
- def __init__(self, volume=20):
- self.__positive = 0
- self.state = False
- self.__negative = 0
- self.__volume = volume
- self.__count = 0
-
- def add_positive(self):
- self.__count += 1
- if self.__positive >= self.__volume:
- # 如果正例个数大于容量,将状态定为True
- self.state = True
- self.__negative = 0
- self.__count = 0
- else:
- self.__positive += 1
-
- if self.__count > self.__volume:
- # 如果大于容量次操作后还没有确定状态
- self.__positive = 0
- self.__count = 0
-
- def add_negative(self):
- self.__count += 1
- if self.__negative >= self.__volume:
- # 如果负例个数大于容量,将状态定为False
- self.state = False
- self.__positive = 0
- else:
- self.__negative += 1
-
- if self.__count > self.__volume:
- # 如果大于容量次操作后还没有确定状态
- self.__positive = 0
- self.__count = 0
- # print(f"pos:{self.__positive} neg:{self.__negative} count:{self.__count}")
-
- def clear(self):
- self.__positive = 0
- self.state = False
- self.__negative = 0
- self.__count = 0
- # 画线函数
- def draw_line(frame, p1, p2, color=(255, 127, 0), thickness=3):
- """
- 画一条直线
- :param p1:
- :param p2:
- :return:
- """
- return cv2.line(frame, (int(p1[0] * CAM_W), int(p1[1] * CAM_H)), (int(p2[0] * CAM_W), int(p2[1] * CAM_H)), color,
- thickness)
- # 控制屏幕亮度
- def screen_change(percent): # percent/2即为亮度百分比
- SCREEN = wmi.WMI(namespace='root/WMI')
- a = SCREEN.WmiMonitorBrightnessMethods()[0]
- a.WmiSetBrightness(Brightness=percent, Timeout=500)
-
- # 初始化音量控制
- def init_voice():
- devices = AudioUtilities.GetSpeakers()
- interface = devices.Activate(
- IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
- volume = cast(interface, POINTER(IAudioEndpointVolume))
- volume.SetMute(0, None)
- volume_range = volume.GetVolumeRange()
- min_volume = volume_range[0]
- max_volume = volume_range[1]
- return (min_volume, max_volume), volume
在项目的初始化阶段,我们需要加载摄像头实例和手部关键点识别实例,以便后续对手势进行识别和处理。
- # 加载摄像头实例
- cap = cv2.VideoCapture(0)
- CAM_W = 640
- CAM_H = 480
- CAM_SCALE = CAM_W / CAM_H
-
- # 加载手部关键点识别实例
- hand = HandKeyPoint()
为了展示手势控制效果,并提供交互界面,我们使用了 Pygame 库。在初始化阶段,我们创建了一个窗口,并设置了标题。同时,我们实现了事件监听功能,以便在需要时退出程序。
具体来说,我们使用 Pygame 创建了一个窗口,并将摄像头捕获的图像显示在窗口中。同时,我们利用 Pygame 的事件监听功能,监听用户的键盘事件,例如按下"q"键时退出程序。这样,用户就可以通过手势控制屏幕亮度和音量大小,同时在 Pygame 窗口中观察手势识别效果。
- # 初始化pygame
- pygame.init()
- # 设置窗口全屏
- screen = pygame.display.set_mode((800, 600))
- pygame.display.set_caption("virtual_control_screen")
- # 获取当前窗口大小
- window_size = list(screen.get_size())
-
- # 主循环
- while True:
- ······
- # 事件监听 若按q则退出程序
- for event in pygame.event.get():
- if event.type == pygame.KEYDOWN:
- if event.key == pygame.K_q:
- sys.exit(0)
如果 20 < angle < 90,那么“light ready”即手势控制亮度。
如果 -20 > angle > -50,那么“voice ready”即手势控制音量。
上述两种情况除外,那么处于“menu”状态即进入菜单。
通过演示可以发现,食指与大拇指在屏幕中的距离越远,亮度越高(音量越大),反之越小,实现了通过手势对亮度和音量的控制。
- #!/usr/bin/env python
- # -*- coding:utf-8 -*-
- """
- @Project : virtual
- @File : virtual_control.py
- @IDE : PyCharm
- @Author : 半亩花海
- @Date : 2024:02:06 18:01
- """
- # 导入模块
- import math
- import sys
- import numpy as np
- import cv2
- import pygame
- import wmi
- import mediapipe as mp
- from ctypes import cast, POINTER
- from comtypes import CLSCTX_ALL
- from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
- import warnings # 忽略警告
- warnings.filterwarnings("ignore")
-
-
- # 手部关键点类
- class HandKeyPoint:
- def __init__(self,
- static_image_mode=False,
- max_num_hands=2,
- model_complexity=1,
- min_detection_confidence=0.5,
- min_tracking_confidence=0.5):
- # 手部识别api
- self.mp_hands = mp.solutions.hands
- # 获取手部识别类
- self.hands = self.mp_hands.Hands(static_image_mode=static_image_mode,
- max_num_hands=max_num_hands,
- model_complexity=model_complexity,
- min_detection_confidence=min_detection_confidence,
- min_tracking_confidence=min_tracking_confidence)
-
- def process(self, image):
- # 将BGR转换为RGB
- img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- # 识别图像中的手势,并返回结果
- results = self.hands.process(img)
- # numpy格式的数据
- np_arr = landmarks_to_numpy(results)
- return results, np_arr
-
-
- # 将landmarks格式的数据转换为numpy格式的数据
- def landmarks_to_numpy(results):
- """
- 将landmarks格式的数据转换为numpy格式的数据
- numpy shape:(2, 21, 3)
- :param results:
- :return:
- """
- shape = (2, 21, 3)
- landmarks = results.multi_hand_landmarks
- if landmarks is None:
- # 没有检测到手
- return np.zeros(shape)
- elif len(landmarks) == 1:
- # 检测出一只手,先判断是左手还是右手
- label = results.multi_handedness[0].classification[0].label
- hand = landmarks[0]
- # print(label)
- if label == "Left":
- return np.array(
- [np.array([[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)]),
- np.zeros((21, 3))])
- else:
- return np.array([np.zeros((21, 3)),
- np.array(
- [[hand.landmark[i].x, hand.landmark[i].y, hand.landmark[i].z] for i in range(21)])])
- elif len(landmarks) == 2:
- # print(results.multi_handedness)
- lh_idx = 0
- rh_idx = 0
- for idx, hand_type in enumerate(results.multi_handedness):
- label = hand_type.classification[0].label
- if label == 'Left':
- lh_idx = idx
- if label == 'Right':
- rh_idx = idx
-
- lh = np.array(
- [[landmarks[lh_idx].landmark[i].x, landmarks[lh_idx].landmark[i].y, landmarks[lh_idx].landmark[i].z] for i
- in range(21)])
- rh = np.array(
- [[landmarks[rh_idx].landmark[i].x, landmarks[rh_idx].landmark[i].y, landmarks[rh_idx].landmark[i].z] for i
- in range(21)])
- return np.array([lh, rh])
- else:
- return np.zeros((2, 21, 3))
-
-
- # 画手势关键点
- def draw_landmark(img, results):
- if results.multi_hand_landmarks:
- for hand_landmark in results.multi_hand_landmarks:
- mp.solutions.drawing_utils.draw_landmarks(img,
- hand_landmark,
- mp.solutions.hands.HAND_CONNECTIONS,
- mp.solutions.drawing_styles.get_default_hand_landmarks_style(),
- mp.solutions.drawing_styles.get_default_hand_connections_style())
-
- return img
-
-
- # 缓冲区类
- class Buffer:
- def __init__(self, volume=20):
- self.__positive = 0
- self.state = False
- self.__negative = 0
- self.__volume = volume
- self.__count = 0
-
- def add_positive(self):
- self.__count += 1
- if self.__positive >= self.__volume:
- # 如果正例个数大于容量,将状态定为True
- self.state = True
- self.__negative = 0
- self.__count = 0
- else:
- self.__positive += 1
-
- if self.__count > self.__volume:
- # 如果大于容量次操作后还没有确定状态
- self.__positive = 0
- self.__count = 0
-
- def add_negative(self):
- self.__count += 1
- if self.__negative >= self.__volume:
- # 如果负例个数大于容量,将状态定为False
- self.state = False
- self.__positive = 0
- else:
- self.__negative += 1
-
- if self.__count > self.__volume:
- # 如果大于容量次操作后还没有确定状态
- self.__positive = 0
- self.__count = 0
- # print(f"pos:{self.__positive} neg:{self.__negative} count:{self.__count}")
-
- def clear(self):
- self.__positive = 0
- self.state = False
- self.__negative = 0
- self.__count = 0
-
-
- # 画线函数
- def draw_line(frame, p1, p2, color=(255, 127, 0), thickness=3):
- """
- 画一条直线
- :param p1:
- :param p2:
- :return:
- """
- return cv2.line(frame, (int(p1[0] * CAM_W), int(p1[1] * CAM_H)), (int(p2[0] * CAM_W), int(p2[1] * CAM_H)), color,
- thickness)
-
-
- # 控制屏幕亮度
- def screen_change(percent): # percent/2即为亮度百分比
- SCREEN = wmi.WMI(namespace='root/WMI')
- a = SCREEN.WmiMonitorBrightnessMethods()[0]
- a.WmiSetBrightness(Brightness=percent, Timeout=500)
-
-
- # 初始化音量控制
- def init_voice():
- devices = AudioUtilities.GetSpeakers()
- interface = devices.Activate(
- IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
- volume = cast(interface, POINTER(IAudioEndpointVolume))
- volume.SetMute(0, None)
- volume_range = volume.GetVolumeRange()
- min_volume = volume_range[0]
- max_volume = volume_range[1]
- return (min_volume, max_volume), volume
-
-
- # 加载摄像头实例
- cap = cv2.VideoCapture(0)
- CAM_W = 640
- CAM_H = 480
- CAM_SCALE = CAM_W / CAM_H
-
- # 加载手部关键点识别实例
- hand = HandKeyPoint()
-
- # 初始化pygame
- pygame.init()
- # 设置窗口全屏
- screen = pygame.display.set_mode((800, 600))
- pygame.display.set_caption("virtual_control_screen")
- # 获取当前窗口大小
- window_size = list(screen.get_size())
-
- # 设置缓冲区
- buffer_light = Buffer(10)
- buffer_voice = Buffer(10)
-
- last_y = 0
- last_2_y = 1
- last_2_x = 0
-
- # 初始化声音控制
- voice_range, volume = init_voice()
-
- # 设置亮度条参数
- bright_bar_length = 300
- bright_bar_height = 20
- bright_bar_x = 50
- bright_bar_y = 100
-
- # 设置音量条参数
- vol_bar_length = 300
- vol_bar_height = 20
- vol_bar_x = 50
- vol_bar_y = 50
-
- # 主循环 每次循环就是对每帧的处理
- while True:
- img_menu = None
- lh_index = -1
- # 读取摄像头画面
- success, frame = cap.read()
-
- # 将opencv中图片格式的BGR转换为常规的RGB
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- # 镜面反转
- frame = cv2.flip(frame, 1)
-
- # 处理图像
- res, arr = hand.process(frame)
- frame = draw_landmark(frame, res)
-
- scale = math.hypot((arr[0, 7, 0] - arr[0, 8, 0]),
- (arr[0, 7, 1] - arr[0, 8, 1]),
- (arr[0, 7, 2] - arr[0, 8, 2]))
-
- # 计算tan值
- tan = (arr[0, 0, 1] - arr[0, 12, 1]) / (arr[0, 0, 0] - arr[0, 12, 0])
- # 计算角度
- angle = np.arctan(tan) * 180 / np.pi
- # print(angle)
-
- if 20 < angle < 90:
- path = 'resources/menu/light.png'
- buffer_light.add_positive()
- buffer_voice.add_negative()
- # 显示亮度条和亮度刻度值
- show_brightness = True
- show_volume = False
- elif -20 > angle > -50:
- path = 'resources/menu/voice.png'
- buffer_voice.add_positive()
- buffer_light.add_negative()
- # 显示音量条和音量刻度值
- show_brightness = False
- show_volume = True
- else:
- path = 'resources/menu/menu.png'
- buffer_light.add_negative()
- buffer_voice.add_negative()
- # 不显示刻度值和百分比
- show_brightness = False
- show_volume = False
-
- # 计算拇指与食指之间的距离
- dis = math.hypot(int((arr[1, 4, 0] - arr[1, 8, 0]) * CAM_W), int((arr[1, 4, 1] - arr[1, 8, 1]) * CAM_H))
- # 右手映射时的缩放尺度
- s = math.hypot((arr[1, 5, 0] - arr[1, 9, 0]), (arr[1, 5, 1] - arr[1, 9, 1]), (arr[1, 5, 2] - arr[1, 9, 2]))
-
- # 调节亮度
- if buffer_light.state:
- frame = cv2.putText(frame, 'light ready', (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 127, 0))
- frame = draw_line(frame, arr[1, 4], arr[1, 8], thickness=5, color=(255, 188, 66))
- if dis != 0:
- # 线性插值,可以理解为将一个区间中的一个值映射到另一区间内
- light = np.interp(dis, [int(500 * s), int(3000 * s)], (0, 100))
- # 调节亮度
- screen_change(light)
- # 调节声音
- elif buffer_voice.state:
- frame = cv2.putText(frame, 'voice ready', (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 127, 0))
- frame = draw_line(frame, arr[1, 4], arr[1, 8], thickness=5, color=(132, 134, 248))
- if dis != 0:
- vol = np.interp(dis, [int(500 * s), int(3000 * s)], voice_range)
- # 调节音量
- volume.SetMasterVolumeLevel(vol, None)
-
- # 将图片改为与窗口一样的大小
- frame = cv2.resize(frame, (int(window_size[1] * CAM_SCALE), window_size[1]))
- frame = cv2.transpose(frame)
- # 渲染图片
- frame = pygame.surfarray.make_surface(frame)
- screen.blit(frame, (int(0.5 * (CAM_W - CAM_H * CAM_SCALE)), 0))
-
- img_menu = pygame.image.load(path).convert_alpha()
- img_w, img_h = img_menu.get_size()
- img_menu = pygame.transform.scale(img_menu, (int(img_w * scale * 5), int(img_h * scale * 5)))
- x = (arr[0][9][0] + arr[0][13][0] + arr[0][0][0]) / 3
- y = (arr[0][9][1] + arr[0][13][1] + arr[0][0][1]) / 3
- x = int(x * window_size[0] - window_size[0] * scale * 3.5)
- y = int(y * window_size[1] - window_size[1] * scale * 12)
- # print(x, y)
- screen.blit(img_menu, (x, y))
-
- # 绘制音量条和亮度条的外框
- if show_volume:
- pygame.draw.rect(screen, (255, 255, 255), (vol_bar_x, vol_bar_y, vol_bar_length, vol_bar_height), 3)
- elif show_brightness:
- pygame.draw.rect(screen, (255, 255, 255), (bright_bar_x, bright_bar_y, bright_bar_length, bright_bar_height),
- 3)
-
- # 计算当前音量和亮度在条上的位置和大小,并绘制已填充的条
- if show_volume:
- vol = volume.GetMasterVolumeLevel()
- vol_range = voice_range[1] - voice_range[0]
- vol_bar_fill_length = int((vol - voice_range[0]) / vol_range * vol_bar_length)
- pygame.draw.rect(screen, (0, 255, 0), (vol_bar_x, vol_bar_y, vol_bar_fill_length, vol_bar_height))
- # 显示音量刻度值和当前音量大小
- vol_text = f"Volume: {int((vol - voice_range[0]) / vol_range * 100)}%"
- vol_text_surface = pygame.font.SysFont(None, 24).render(vol_text, True, (255, 255, 255))
- screen.blit(vol_text_surface, (vol_bar_x + vol_bar_length + 10, vol_bar_y))
- elif show_brightness:
- brightness = wmi.WMI(namespace='root/WMI').WmiMonitorBrightness()[0].CurrentBrightness
- bright_bar_fill_length = int(brightness / 100 * bright_bar_length)
- pygame.draw.rect(screen, (255, 255, 0), (bright_bar_x, bright_bar_y, bright_bar_fill_length, bright_bar_height))
- # 显示亮度刻度值和当前亮度大小
- bright_text = f"Brightness: {brightness}%"
- bright_text_surface = pygame.font.SysFont(None, 24).render(bright_text, True, (255, 255, 255))
- screen.blit(bright_text_surface, (bright_bar_x + bright_bar_length + 10, bright_bar_y))
-
- pygame.display.flip()
-
- # 事件监听 若按q则退出程序
- for event in pygame.event.get():
- if event.type == pygame.KEYDOWN:
- if event.key == pygame.K_q:
- sys.exit(0)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。