赞
踩
废话不多说,直接上代码,先安装环境
需要安装的包:
json
pyaudio
numpy
vosk
新建一个py文件写入:
import json import pyaudio import numpy as np from vosk import Model, KaldiRecognizer, SetLogLevel def SaveWave(model): # 设置音频参数 FORMAT = pyaudio.paInt16 # 音频流的格式 RATE = 44100 # 采样率,单位Hz CHUNK = 4000 # 单位帧 THRESHOLDNUM = 30 # 静默时间,超过这个个数就保存文件 THRESHOLD = 100 # 设定停止采集阈值 audio = pyaudio.PyAudio() stream = audio.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] print("开始录音...") count = 0 while count < THRESHOLDNUM: data = stream.read(CHUNK, exception_on_overflow=False) np_data = np.frombuffer(data, dtype=np.int16) frame_energy = np.mean(np.abs(np_data)) # print(frame_energy) # 如果能量低于阈值持续时间过长,则停止录音 if frame_energy < THRESHOLD: count += 1 elif count > 0: count -= 1 frames.append(data) print("停止录音!") stream.stop_stream() stream.close() audio.terminate() rec = KaldiRecognizer(model, RATE) rec.SetWords(True) str_ret = "" for data in frames: if rec.AcceptWaveform(data): result = json.loads(rec.Result()) if 'text' in result: str_ret += result['text'] result = json.loads(rec.FinalResult()) if 'text' in result: str_ret += result['text'] str_ret = "".join(str_ret.split()) return str_ret if __name__ == "__main__": model = Model("vosk-model-small-cn-0.22") SetLogLevel(-1) while 1: res = SaveWave(model) if res != "" and res != None: print(res)
下载
https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip
并解压到和上述文件同目录下并开始运行即可!
运行效果:
开始录音...
停止录音
我说一句话
方便救急,比配其他环境快得多,5分钟就能跑起来!!!
给出一个比较大的模型:
https://alphacephei.com/vosk/models/vosk-model-cn-0.15.zip
替换路径即可
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。