赞
踩
目录
音频转特征向量
GitHub - librosa/librosa: Python library for audio and music analysis
2024.04.27 测试ok Win11系统
pip install librosa
- import os
-
- import numpy as np
- from transformers import Wav2Vec2Processor, Wav2Vec2Model
- import torch
-
- import librosa
-
-
- def load_example_input(audio_path, processor=None):
- if processor is None:
- processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
-
- speech_array, sampling_rate = librosa.load(os.path.join(audio_path), sr=16000)
-
- audio_feature = np.squeeze(processor(speech_array, sampling_rate=sampling_rate).input_values)
-
- audio_feature = np.reshape(audio_feature, (-1, audio_feature.shape[0]))
-
- return torch.FloatTensor(audio_feature)
-
-
- audio_path=r'demo/wav/man.wav'
-
-
- load_example_input(audio_path)
pip install SpeechRecognition
pip install pyaudio
- import librosa
- import speech_recognition as sr
-
- # 录制音频
- r = sr.Recognizer()
- with sr.Microphone() as source:
- print("请开始说话...")
- audio = r.listen(source)
-
- # 将音频转换为文本
- try:
- text = r.recognize_google(audio)
- print("识别结果:", text)
- except sr.UnknownValueError:
- print("无法识别音频")
- except sr.RequestError as e:
- print(f"请求出错:{e}")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。