赞
踩
参考文章https://blog.xinpapa.com/2017/10/30/silk-wav/
电脑需安装gcc(https://www.jianshu.com/p/ff24a81f3637)和ffmpeg(http://download.csdn.net/download/keketrtr/10206787)
项目需求,需要使用讯飞的语音识别接口,将微信小程序上传的录音文件识别成文字返回
首先去讯飞开放平台中申请开通语音识别功能
在这里面下载sdk,然后解压,注意appid与sdk是关联的,appid在初始化接口时候需要
我觉得需要注意的点:
1、下载讯飞语音SDK,把lib下的json-jena-1.0.jar和Msc.jar拷贝到工程的lib目录下。
/** * 获取音频文件时长(用到了jave-2.0.jar) * @param filePath 文件路径 * @return */ private long getDuration(String filePath) { long duration = 0;//音频长度,秒 File source = new File(filePath); Encoder encoder = new Encoder(); MultimediaInfo m = null; try { m = encoder.getInfo(source); } catch (EncoderException e) { e.printStackTrace(); } long ls = m.getDuration(); duration = ls/1000; return duration; }
我把我的java代码实现的微信小程序语音识别分享出来(已经跑通)
import net.sf.json.JSONObject; import java.io.File; import java.io.IOException; import java.util.List; import java.util.Map; /** * Created by ZhaoYuJie on 2018/1/15. */ public class VoiceUtil { /** * * @param silkPath silk文件的路径 * @return 音频转换出的文字内容 */ public static String convertSilkToText(String silkPath){ String pcmPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".pcm"; String wavPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".wav"; //获取当前操作系统 String os = System.getProperty("os.name").toLowerCase(); if(os.indexOf("linux")>=0){ wavPath = convertSilkToWav(silkPath); }else if(os.indexOf("windows")>=0){ convertSilkToPcm(silkPath,pcmPath); convertPcmToWav(pcmPath,wavPath); }else{ return null; } return wavToWords(wavPath); } /** * 将silk文件转换为pcm文件(用于windows系统) * @param silkPath 输入的silk格式的音频文件路径,例如D:/silk.silk * @param pcmPath 输出的pcm格式的音频文件路径,例如D:/result.pcm */ private static void convertSilkToPcm(String silkPath,String pcmPath){ File silk = new File(silkPath);//silk文件 File pcm = new File(pcmPath);//转码后的pcm文件 try { //silk转pcm,“-Fs_API 16000”设置输入音频的采样率为16000 String cmd = "cmd.exe /c " + ConfigUtil.getValue("windows_silk_convert_path") +" " + silk.getAbsolutePath() + " " + pcm.getAbsolutePath() + " -Fs_API 16000"; Runtime.getRuntime().exec(cmd); } catch (Exception e) { e.printStackTrace(); } } /** * 将pcm文件转换为wav文件(用于windows系统) * @param pcmPath 需要转码的pcm文件路径,例如D:/result.pcm * @param wavPath 转码后的wav文件路径,例如D:/result.wav */ private static void convertPcmToWav(String pcmPath,String wavPath){ File pcm = new File(pcmPath); File wav= new File(wavPath); Process exec = null; try { //pcm转wav或其它格式 String cmd = "cmd /c ffmpeg.exe -loglevel quiet -y -f s16le -ar 16000 -ac 1 -i " + pcm.getAbsolutePath() + " " + wav.getAbsolutePath(); exec = Runtime.getRuntime().exec(cmd); exec.waitFor(); } catch (Exception e) { e.printStackTrace(); } } /** * silk文件转wav文件(用于linux)<br/> * 需要修改converter.sh脚本的第70行,将ffmpeg -y -f s16le -ar 24000 -ac 1 -i "$1.pcm" "${1%.*}.$2" > /dev/null 2>&1里面的24000改成16000(采样率) * @param silkPath silk文件路径 * @return 返回wav文件路径 */ private static String convertSilkToWav(String silkPath) { File silk = new File(silkPath);//silk文件 //执行converter.sh,silk转wav,这里执行后,会在tmp/silk/目录下生成wav音频文件 Process exec = null; try { exec = Runtime.getRuntime().exec("sh " + ConfigUtil.getValue("linux_silk_convert_path") + " " + silk.getAbsolutePath() + " wav"); exec.waitFor(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } String wavPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".wav"; return wavPath; } /** * 将wav文件转换为文字内容 * @param wavPath * @return */ private static String wavToWords(String wavPath){ //讯飞语音识别接口识别wav音频文件,转成文字返回 SRTool sr = new SRTool(); String words = null; try { words = sr.voice2words(wavPath); } catch (InterruptedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } System.out.println("讯飞语音返回的json串:"+words); String result = sr2words(words); System.out.println("讯飞识别的语音结果:"+result); return result; } private static String sr2words(String jsonString){ StringBuffer sb = new StringBuffer(); String[] split = jsonString.split("}]}]}"); for (int i = 0; i < split.length; i++) { String s = split[i] + "}]}]}"; System.out.println(s); Map parse = JSONObject.fromObject(s); List<Map> ws = (List<Map>) parse.get("ws"); for (int i1 = 0; i1 < ws.size(); i1++) { List<Map> cw = (List<Map>)ws.get(i1).get("cw"); String w = cw.get(0).get("w").toString(); sb.append(w); } } return sb.toString(); } /** * 过滤掉字符串中除了中英文字符之外的字符 * @param str * @return 返回过滤后的字符串 */ public static String delRedundantCharacters(String str){ return str.replaceAll("(?i)[^a-zA-Z0-9\u4E00-\u9FA5]", ""); } public static void main(String[] args) { String content = convertSilkToText("D:\\upload\\voiceDir\\4\\oHosW0Yzg79GVpkMVl18yvZtvDzA\\f0178efc0fc84befb60c7196722b3345.silk"); System.out.println("音频文件解析结果:"+content); // System.out.println(delRedundantCharacters("你 好。")); } }
import com.iflytek.cloud.speech.*; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; /** * 讯飞语音工具 */ public class SRTool { private int perWaitTime = 100; private StringBuffer mResult = new StringBuffer(); static { SpeechUtility.createUtility(SpeechConstant.APPID+"="+ConfigUtil.getValue("XunFeiAppID"));//申请的appid } public String voice2words(String fileName) throws InterruptedException, IOException { return to(fileName); } public String to(String fileName) throws InterruptedException, IOException { File file = new File(fileName); if(!file.exists()){ throw new RuntimeException("要读取的文件不存在"); } FileInputStream fis = new FileInputStream(file); int len = 0; byte[] buf = new byte[fis.available()]; fis.read(buf); fis.close(); //1.创建SpeechRecognizer对象 SpeechRecognizer mIat = SpeechRecognizer.createRecognizer(); //2.设置听写参数,详见《MSC Reference Manual》SpeechConstant类 mIat.setParameter(SpeechConstant.DOMAIN, "iat"); mIat.setParameter(SpeechConstant.LANGUAGE, "zh_cn"); mIat.setParameter(SpeechConstant.ACCENT, "mandarin "); mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-1"); //3.开始听写 mIat.startListening(mRecoListener); //voiceBuffer为音频数据流,splitBuffer为自定义分割接口,将其以4.8k字节分割成数组 ArrayList<byte[]> buffers = splitBuffer(buf, buf.length, 4800); for (int i = 0; i < buffers.size(); i++) { // 每次写入msc数据4.8K,相当150ms录音数据 mIat.writeAudio(buffers.get(i), 0, buffers.get(i).length); } mIat.stopListening(); while (mIat.isListening()) { Thread.sleep(perWaitTime); } return mResult+""; } /** * 将字节缓冲区按照固定大小进行分割成数组 * * @param buffer 缓冲区 * @param length 缓冲区大小 * @param spsize 切割块大小 * @return */ private ArrayList<byte[]> splitBuffer(byte[] buffer, int length, int spsize) { ArrayList<byte[]> array = new ArrayList<byte[]>(); if (spsize <= 0 || length <= 0 || buffer == null || buffer.length < length) return array; int size = 0; while (size < length) { int left = length - size; if (spsize < left) { byte[] sdata = new byte[spsize]; System.arraycopy(buffer, size, sdata, 0, spsize); array.add(sdata); size += spsize; } else { byte[] sdata = new byte[left]; System.arraycopy(buffer, size, sdata, 0, left); array.add(sdata); size += left; } } return array; } //听写监听器 private RecognizerListener mRecoListener = new RecognizerListener() { public void onResult(RecognizerResult results, boolean isLast) { System.out.println("Result:" + results.getResultString()); mResult.append(results.getResultString()); } //会话发生错误回调接口 public void onError(SpeechError error) { System.out.println(error.getErrorCode()+"=========="+error.getErrorDesc()); System.out.println(error); } //开始录音 public void onBeginOfSpeech() { } //音量值0~30 public void onVolumeChange(int volume) { } @Override public void onVolumeChanged(int i) { } @Override public void onEndOfSpeech() { } @Override public void onEvent(int i, int i1, int i2, String s) { } }; }
import java.io.IOException; import java.io.InputStream; import java.util.Properties; public class ConfigUtil { private static final Properties prop = new Properties(); static { InputStream is = null; try { is = ConfigUtil.class.getClassLoader().getResourceAsStream("config.properties"); prop.load(is); } catch (IOException e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } public static String getValue(String key){ return prop.getProperty(key); } public static String getValue(String key,String defaultValue){ return prop.getProperty(key,defaultValue); } public static Integer getInteger(String key){ return Integer.valueOf(prop.getProperty(key)); } }
config.properties
#linux系统下silk转wav的命令路径 linux_silk_convert_path=/usr/silk-v3-decoder-master/converter.sh #linux系统下silk转pcm的命令路径 windows_silk_convert_path=D:/silk-v3-decoder-master/windows/silk_v3_decoder.exe
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。