当前位置:   article > 正文

MAC使用python下载字幕_ai去字幕python下载

ai去字幕python下载

安装软件

brew install tesseract

安装pytesseract和pillow

  1. pip install pytesseract
  2. pip install pillow
  3. pip install SpeechRecognition
  4. pip3 install soundfile
  5. pip3 install torch 
  6. pip3 install whisper

脚本参考:

  1. import os
  2. import sys
  3. import cv2
  4. import pytesseract
  5. import speech_recognition as sr
  6. from moviepy.video.io.VideoFileClip import VideoFileClip
  7. from pydub import AudioSegment
  8. def extract_subtitles(video_path, output_dir):
  9. audio = AudioSegment.from_file(video_path, format='mp4')
  10. audio.export("audio.wav", format="wav")
  11. r = sr.Recognizer()
  12. audio_file = sr.AudioFile('audio.wav')
  13. with audio_file as source:
  14. audio = r.record(source)
  15. text = r.recognize_whisper(audio)
  16. print(text)
  17. def extract_subtitles_v2(video_path, output_dir):
  18. r = sr.Recognizer()
  19. clip = VideoFileClip(video_path)
  20. clip.audio.write_audiofile('audio.wav')
  21. sound = AudioSegment.from_file('audio.wav', format='wav')
  22. def transcribe_audio(sound):
  23. transcript = ''
  24. with sr.AudioFile(sound) as source:
  25. audio_text = r.record(source)
  26. try:
  27. transcript = r.recognize_whisper(audio_text)
  28. except sr.UnknownValueError as e:
  29. print(e)
  30. return transcript
  31. transcription = transcribe_audio(sound)
  32. print(transcription)
  33. def extract_subtitles_v1(video_path, output_dir):
  34. vidcap = cv2.VideoCapture(video_path)
  35. frames = []
  36. success, image = vidcap.read()
  37. count = 0
  38. success = True
  39. while success:
  40. frames.append(image)
  41. success, image = vidcap.read()
  42. count += 1
  43. # lang='chi_sim'
  44. for frame in frames:
  45. # text = pytesseract.image_to_string(frame, lang='eng')
  46. text = pytesseract.image_to_string(frame, lang='chi_sim')
  47. print(text)
  48. def extract_subtitles_v3(video_path, output_dir):
  49. vidcap = cv2.VideoCapture(video_path)
  50. frames = []
  51. success, image = vidcap.read()
  52. count = 0
  53. success = True
  54. while success:
  55. success, image = vidcap.read()
  56. text = pytesseract.image_to_string(image, lang='chi_sim')
  57. print(text)
  58. count += 1
  59. if __name__ == '__main__':
  60. current_dir = os.getcwd()
  61. print("current_dir:", current_dir)
  62. # current_dir + "/" + "png"
  63. # current_dir + "/" + "png"
  64. # args: ['merge.py', 'png', 'png']
  65. args = sys.argv
  66. print('args:', args)
  67. input_dir = args[1]
  68. output_dir = args[2]
  69. # extract_subtitles(input_dir, output_dir)
  70. # extract_subtitles_v1(input_dir, output_dir)
  71. extract_subtitles_v3(input_dir, output_dir)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/菜鸟追梦旅行/article/detail/686862
推荐阅读
相关标签
  

闽ICP备14008679号