当前位置:   article > 正文

python-OpenCV:调用视频识别手势进行电脑音量调节_

写在前面

对于python调节音量的问题,一般都是直接使用pycaw库进行调节,但是当我们要设定电脑音量的时候,不能实现精确映射(我想让我的电脑音量是40,不能直接输入40),但是由于内部确实没有精确的关系,只能用一对一映射的方式。详情请参照:

python-使用pycaw设置电脑音量(包含转换)_独憩的博客-CSDN博客

手部识别可以通过mediapipe库进行:

python-OpenCV 视频中的手部跟踪: 基于mediapipe库_独憩的博客-CSDN博客

对于调用视频识别手势进行电脑音量调节这个问题,网上有很多教程,一般是直接测量两个手指的距离进行映射,这样的问题是:我改变了自身位置就很难进行控制。故我本次对其进行改进:即先进行标定,标定的目的是获取此时的两个手指直接的最大距离,以这个maxlengh为基础,进行映射。

import:

  1. import cv2
  2. import math
  3. import time
  4. import mediapipe as mp
  5. from os import listdir
  6. from datetime import datetime
  7. import time
  8. import datetime
  9. from ctypes import cast, POINTER
  10. from comtypes import CLSCTX_ALL
  11. from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

调用电脑发声硬件:

  1. mp_drawing = mp.solutions.drawing_utils
  2. mp_hands = mp.solutions.hands
  3. devices = AudioUtilities.GetSpeakers()
  4. interface = devices.Activate(
  5. IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
  6. volume = cast(interface, POINTER(IAudioEndpointVolume))
  7. volRange = volume.GetVolumeRange()

定义一些函数:

这个函数的作用是将识别到的手的点映射到图像坐标上,为后期的画点及计算距离服务。

  1. def Normalize_landmarks(image, hand_landmarks):
  2. new_landmarks = []
  3. for i in range(0, len(hand_landmarks.landmark)):
  4. float_x = hand_landmarks.landmark[i].x
  5. float_y = hand_landmarks.landmark[i].y
  6. width = image.shape[1]
  7. height = image.shape[0]
  8. pt = mp_drawing._normalized_to_pixel_coordinates(float_x, float_y, width, height)
  9. new_landmarks.append(pt)
  10. return new_landmarks

这个函数是画图函数,将食指与拇指的位置单独画出,并连线,至于为什么是landmarks[4]与landmarks[8],请参照python-OpenCV 视频中的手部跟踪: 基于mediapipe库_独憩的博客-CSDN博客

  1. def Draw_hand_points(image, normalized_hand_landmarks):
  2. cv2.circle(image, normalized_hand_landmarks[4], 12, (255, 0, 255), -1, cv2.LINE_AA)
  3. cv2.circle(image, normalized_hand_landmarks[8], 12, (255, 0, 255), -1, cv2.LINE_AA)
  4. cv2.line(image, normalized_hand_landmarks[4], normalized_hand_landmarks[8], (255, 0, 255), 3)
  5. x1, y1 = normalized_hand_landmarks[4][0], normalized_hand_landmarks[4][1]
  6. x2, y2 = normalized_hand_landmarks[8][0], normalized_hand_landmarks[8][1]
  7. mid_x, mid_y = (x1 + x2) // 2, (y1 + y2) // 2
  8. length = math.sqrt((x2 - x1)**2+(y2 - y1)**2) #得到大拇指到食指的距离
  9. if length < 100:
  10. cv2.circle(image, (mid_x, mid_y), 12, (0, 255, 0), cv2.FILLED)
  11. else:
  12. cv2.circle(image, (mid_x, mid_y), 12, (255, 0, 255), cv2.FILLED)
  13. return image, length

这两个函数的作用是将电脑的音量数字(0-100)与pycaw库中的数字对应,很蠢但是很有效,由于反向对应做不到11对应,只能找到误差最小的点进行对应:

  1. def vol_tansfer(x):
  2. dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
  3. 9: -34.63, 10: -33.24,
  4. 11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
  5. 20: -23.65,
  6. 21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
  7. 30: -17.82,
  8. 31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
  9. 40: -13.62,
  10. 41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
  11. 50: -10.33,
  12. 51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
  13. 60: -7.63,
  14. 61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
  15. 71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
  16. 81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
  17. 90: -1.58,
  18. 91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
  19. return dict[x]
  20. def vol_tansfer_reverse(x):
  21. error = []
  22. dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
  23. 9: -34.63, 10: -33.24,
  24. 11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
  25. 20: -23.65,
  26. 21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
  27. 30: -17.82,
  28. 31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
  29. 40: -13.62,
  30. 41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
  31. 50: -10.33,
  32. 51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
  33. 60: -7.63,
  34. 61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
  35. 71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
  36. 81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
  37. 90: -1.58,
  38. 91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
  39. for i in range (100):
  40. error.append(abs(dict[i]-x))
  41. return error.index(min(error))

主循环:

主要的逻辑是在大循环下设置两个小循环,第一个循环是标定循环,持续5秒,可以得到5秒内的len_max。以此为依据映射到电脑音量(0-100):

vol = int((length) / len_max * 100)
  1. hands = mp_hands.Hands(
  2. min_detection_confidence=0.5, min_tracking_confidence=0.5)
  3. cap = cv2.VideoCapture(0)
  4. len_max = 0
  5. len_min = 0
  6. num = 0
  7. while cap.isOpened():
  8. stop = datetime.datetime.now() + datetime.timedelta(seconds=5)
  9. if num == 0:
  10. while datetime.datetime.now() < stop:
  11. success, image = cap.read()
  12. if not success:
  13. print("camera frame is empty!")
  14. continue
  15. image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  16. image.flags.writeable = False
  17. results = hands.process(image)
  18. image.flags.writeable = True
  19. image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  20. if results.multi_hand_landmarks:
  21. for hand_landmarks in results.multi_hand_landmarks:
  22. mp_drawing.draw_landmarks(
  23. image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
  24. normalized_landmarks = Normalize_landmarks(image, hand_landmarks)
  25. image, length = Draw_hand_points(image, normalized_landmarks)
  26. if length>len_max:
  27. len_max = length
  28. strRate = 'Start calibration'
  29. cv2.putText(image, strRate, (10, 410), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  30. strRate1 = 'max length = %d'%len_max
  31. cv2.putText(image, strRate1, (10, 110), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  32. cv2.imshow('result', image)
  33. if cv2.waitKey(5) & 0xFF == 27:
  34. break
  35. num = 1
  36. success, image = cap.read()
  37. if not success:
  38. print("camera frame is empty!")
  39. continue
  40. image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  41. image.flags.writeable = False
  42. results = hands.process(image)
  43. image.flags.writeable = True
  44. image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  45. if results.multi_hand_landmarks:
  46. for hand_landmarks in results.multi_hand_landmarks:
  47. mp_drawing.draw_landmarks(
  48. image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
  49. normalized_landmarks = Normalize_landmarks(image, hand_landmarks)
  50. try:
  51. image, length = Draw_hand_points(image, normalized_landmarks)
  52. # print(length) #20~300
  53. cv2.rectangle(image, (50, 150), (85, 350), (255, 0, 0), 1)
  54. if length >len_max:
  55. length = len_max
  56. vol = int((length) / len_max * 100)
  57. volume.SetMasterVolumeLevel(vol_tansfer(vol), None)
  58. cv2.rectangle(image, (50, 150+200-2*vol), (85, 350), (255, 0, 0), cv2.FILLED)
  59. percent = int(length / len_max * 100)
  60. # print(percent)
  61. strRate = str(percent) + '%'
  62. cv2.putText(image, strRate, (40, 410), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  63. vol_now = vol_tansfer_reverse(volume.GetMasterVolumeLevel())
  64. strvol = 'Current volume is'+str(vol_now)
  65. cv2.putText(image, strvol, (10, 470), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  66. except:
  67. pass
  68. cv2.imshow('result', image)
  69. if cv2.waitKey(10) & 0xFF == ord('q'):
  70. len_max = 0
  71. num = 0
  72. if cv2.waitKey(10) & 0xFF == 27:
  73. break
  74. cv2.destroyAllWindows()
  75. hands.close()
  76. cap.release()

最后,敲入"q"可以重新标定,敲入"esc"可以退出。

结果:

 

全部代码:

  1. import cv2
  2. import math
  3. import time
  4. import mediapipe as mp
  5. from os import listdir
  6. from datetime import datetime
  7. import time
  8. import datetime
  9. from ctypes import cast, POINTER
  10. from comtypes import CLSCTX_ALL
  11. from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
  12. mp_drawing = mp.solutions.drawing_utils
  13. mp_hands = mp.solutions.hands
  14. devices = AudioUtilities.GetSpeakers()
  15. interface = devices.Activate(
  16. IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
  17. volume = cast(interface, POINTER(IAudioEndpointVolume))
  18. volRange = volume.GetVolumeRange()
  19. minVol = volRange[0]
  20. maxVol = volRange[1]
  21. print(minVol, maxVol)
  22. def Normalize_landmarks(image, hand_landmarks):
  23. new_landmarks = []
  24. for i in range(0, len(hand_landmarks.landmark)):
  25. float_x = hand_landmarks.landmark[i].x
  26. float_y = hand_landmarks.landmark[i].y
  27. width = image.shape[1]
  28. height = image.shape[0]
  29. pt = mp_drawing._normalized_to_pixel_coordinates(float_x, float_y, width, height)
  30. new_landmarks.append(pt)
  31. return new_landmarks
  32. def Draw_hand_points(image, normalized_hand_landmarks):
  33. cv2.circle(image, normalized_hand_landmarks[4], 12, (255, 0, 255), -1, cv2.LINE_AA)
  34. cv2.circle(image, normalized_hand_landmarks[8], 12, (255, 0, 255), -1, cv2.LINE_AA)
  35. cv2.line(image, normalized_hand_landmarks[4], normalized_hand_landmarks[8], (255, 0, 255), 3)
  36. x1, y1 = normalized_hand_landmarks[4][0], normalized_hand_landmarks[4][1]
  37. x2, y2 = normalized_hand_landmarks[8][0], normalized_hand_landmarks[8][1]
  38. mid_x, mid_y = (x1 + x2) // 2, (y1 + y2) // 2
  39. length = math.sqrt((x2 - x1)**2+(y2 - y1)**2) #得到大拇指到食指的距离
  40. if length < 100:
  41. cv2.circle(image, (mid_x, mid_y), 12, (0, 255, 0), cv2.FILLED)
  42. else:
  43. cv2.circle(image, (mid_x, mid_y), 12, (255, 0, 255), cv2.FILLED)
  44. return image, length
  45. def vol_tansfer(x):
  46. dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
  47. 9: -34.63, 10: -33.24,
  48. 11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
  49. 20: -23.65,
  50. 21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
  51. 30: -17.82,
  52. 31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
  53. 40: -13.62,
  54. 41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
  55. 50: -10.33,
  56. 51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
  57. 60: -7.63,
  58. 61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
  59. 71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
  60. 81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
  61. 90: -1.58,
  62. 91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
  63. return dict[x]
  64. def vol_tansfer_reverse(x):
  65. error = []
  66. dict = {0: -65.25, 1: -56.99, 2: -51.67, 3: -47.74, 4: -44.62, 5: -42.03, 6: -39.82, 7: -37.89, 8: -36.17,
  67. 9: -34.63, 10: -33.24,
  68. 11: -31.96, 12: -30.78, 13: -29.68, 14: -28.66, 15: -27.7, 16: -26.8, 17: -25.95, 18: -25.15, 19: -24.38,
  69. 20: -23.65,
  70. 21: -22.96, 22: -22.3, 23: -21.66, 24: -21.05, 25: -20.46, 26: -19.9, 27: -19.35, 28: -18.82, 29: -18.32,
  71. 30: -17.82,
  72. 31: -17.35, 32: -16.88, 33: -16.44, 34: -16.0, 35: -15.58, 36: -15.16, 37: -14.76, 38: -14.37, 39: -13.99,
  73. 40: -13.62,
  74. 41: -13.26, 42: -12.9, 43: -12.56, 44: -12.22, 45: -11.89, 46: -11.56, 47: -11.24, 48: -10.93, 49: -10.63,
  75. 50: -10.33,
  76. 51: -10.04, 52: -9.75, 53: -9.47, 54: -9.19, 55: -8.92, 56: -8.65, 57: -8.39, 58: -8.13, 59: -7.88,
  77. 60: -7.63,
  78. 61: -7.38, 62: -7.14, 63: -6.9, 64: -6.67, 65: -6.44, 66: -6.21, 67: -5.99, 68: -5.76, 69: -5.55, 70: -5.33,
  79. 71: -5.12, 72: -4.91, 73: -4.71, 74: -4.5, 75: -4.3, 76: -4.11, 77: -3.91, 78: -3.72, 79: -3.53, 80: -3.34,
  80. 81: -3.15, 82: -2.97, 83: -2.79, 84: -2.61, 85: -2.43, 86: -2.26, 87: -2.09, 88: -1.91, 89: -1.75,
  81. 90: -1.58,
  82. 91: -1.41, 92: -1.25, 93: -1.09, 94: -0.93, 95: -0.77, 96: -0.61, 97: -0.46, 98: -0.3, 99: -0.15, 100: 0.0}
  83. for i in range (100):
  84. error.append(abs(dict[i]-x))
  85. return error.index(min(error))
  86. hands = mp_hands.Hands(
  87. min_detection_confidence=0.5, min_tracking_confidence=0.5)
  88. cap = cv2.VideoCapture(0)
  89. len_max = 0
  90. len_min = 0
  91. num = 0
  92. while cap.isOpened():
  93. stop = datetime.datetime.now() + datetime.timedelta(seconds=5)
  94. if num == 0:
  95. while datetime.datetime.now() < stop:
  96. success, image = cap.read()
  97. if not success:
  98. print("camera frame is empty!")
  99. continue
  100. image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  101. image.flags.writeable = False
  102. results = hands.process(image)
  103. image.flags.writeable = True
  104. image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  105. if results.multi_hand_landmarks:
  106. for hand_landmarks in results.multi_hand_landmarks:
  107. mp_drawing.draw_landmarks(
  108. image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
  109. normalized_landmarks = Normalize_landmarks(image, hand_landmarks)
  110. image, length = Draw_hand_points(image, normalized_landmarks)
  111. if length>len_max:
  112. len_max = length
  113. strRate = 'Start calibration'
  114. cv2.putText(image, strRate, (10, 410), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  115. strRate1 = 'max length = %d'%len_max
  116. cv2.putText(image, strRate1, (10, 110), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  117. cv2.imshow('result', image)
  118. if cv2.waitKey(5) & 0xFF == 27:
  119. break
  120. num = 1
  121. success, image = cap.read()
  122. if not success:
  123. print("camera frame is empty!")
  124. continue
  125. image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
  126. image.flags.writeable = False
  127. results = hands.process(image)
  128. image.flags.writeable = True
  129. image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  130. if results.multi_hand_landmarks:
  131. for hand_landmarks in results.multi_hand_landmarks:
  132. mp_drawing.draw_landmarks(
  133. image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
  134. normalized_landmarks = Normalize_landmarks(image, hand_landmarks)
  135. try:
  136. image, length = Draw_hand_points(image, normalized_landmarks)
  137. # print(length) #20~300
  138. cv2.rectangle(image, (50, 150), (85, 350), (255, 0, 0), 1)
  139. if length >len_max:
  140. length = len_max
  141. vol = int((length) / len_max * 100)
  142. volume.SetMasterVolumeLevel(vol_tansfer(vol), None)
  143. cv2.rectangle(image, (50, 150+200-2*vol), (85, 350), (255, 0, 0), cv2.FILLED)
  144. percent = int(length / len_max * 100)
  145. # print(percent)
  146. strRate = str(percent) + '%'
  147. cv2.putText(image, strRate, (40, 410), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  148. vol_now = vol_tansfer_reverse(volume.GetMasterVolumeLevel())
  149. strvol = 'Current volume is'+str(vol_now)
  150. cv2.putText(image, strvol, (10, 470), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2)
  151. except:
  152. pass
  153. cv2.imshow('result', image)
  154. if cv2.waitKey(10) & 0xFF == ord('q'):
  155. len_max = 0
  156. num = 0
  157. if cv2.waitKey(10) & 0xFF == 27:
  158. break
  159. cv2.destroyAllWindows()
  160. hands.close()
  161. cap.release()

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/羊村懒王/article/detail/80245
推荐阅读
相关标签
  

闽ICP备14008679号