当前位置:   article > 正文

Windows系统实现唤醒+合成+命令词智能语音交互_离线唤醒+离线合成+离线命令词实现智能交互

离线唤醒+离线合成+离线命令词实现智能交互

1、之前写过离线能力调用,今天来个终极版,实现智能交互或者结合大模型的智能交互示例,下面进入正题。上B站效果离线唤醒+离线合成+离线命令词实现智能交互_哔哩哔哩_bilibili

2、到讯飞开放平台下载唤醒+合成+命令词的离线组合包,找到msc_64.dll复制三份出来,一定要注意路径位置,不然会出现错误。msc直接下载的原封不动的拷贝就行

3、常量类的定义,各位直接复制粘贴即可,注意换自己的APPID,不然报错的

  1. package com.day.config;
  2. import com.sun.jna.ptr.IntByReference;
  3. import javax.sound.sampled.*;
  4. import java.io.ByteArrayOutputStream;
  5. import java.io.FileInputStream;
  6. public class Constants {
  7. // 构造16K 16BIT 单声道音频
  8. public static final String APPID = "5e11538f"; // APPID
  9. public static final String WORK_DIR = "src/main/resources";
  10. // 1、唤醒相关 ssb_param,一定注意IVW_SSB_PARAMS的fo|xxx资源的路径,xxx取值是指WORK_DIR目录下/msc/xxx xxx是以后的路径开始拼接的!!!!!!!!!!!
  11. public static final AudioFormat IVW_ASR_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
  12. public static final String IVW_DLL_PATH = "src/main/resources/ivw_msc_x64.dll"; // windows动态库路径
  13. public static final String IVW_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
  14. public static final String IVW_SSB_PARAMS = "ivw_threshold=0:1450,sst=wakeup,ivw_shot_word=1,ivw_res_path =fo|res/ivw/wakeupresource.jet";
  15. public static IntByReference IVW_ERROR_CODE = new IntByReference(-100);
  16. public static final Integer IVW_FRAME_SIZE = 6400; // 一定要每200ms写10帧,否则会出现唤醒一段时间后无法唤醒的问题,一帧的大小为640B,其他大小可能导致无法唤醒。
  17. public static Integer IVW_AUDIO_STATUS = 1;
  18. public static DataLine.Info IVW_ASR_DATA_LINE_INFO = new DataLine.Info(TargetDataLine.class, IVW_ASR_AUDIO_FORMAT);
  19. public static TargetDataLine IVW_ASR_TARGET_DATA_LINE; // 录音
  20. static {
  21. try {
  22. IVW_ASR_TARGET_DATA_LINE = (TargetDataLine) AudioSystem.getLine(IVW_ASR_DATA_LINE_INFO);
  23. } catch (LineUnavailableException e) {
  24. e.printStackTrace();
  25. }
  26. }
  27. // 2、合成相关
  28. public static final AudioFormat TTS_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
  29. public static final String TTS_DLL_PATH = "src/main/resources/tts_msc_x64.dll"; // windows动态库路径
  30. public static final String TTS_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
  31. public static final String TTS_SESSION_BEGIN_PARAMS = "engine_type = local, voice_name = xiaoyan, text_encoding = UTF8," +
  32. " tts_res_path = fo|res/tts/xiaoyan.jet;fo|res/tts/common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
  33. public static IntByReference TTS_ERROR_CODE = new IntByReference(-100);
  34. public static IntByReference TTS_AUDIO_LEN = new IntByReference(-100);
  35. public static IntByReference TTS_SYNTH_STATUS = new IntByReference(-100);
  36. public static String TTS_TEXT; // 合成文本
  37. public static Integer TTS_TOTAL_AUDIO_LENGTH; // 合成音频长度
  38. public static ByteArrayOutputStream TTS_BYTE_ARRAY_OUTPUT_STREAM; // 合成音频流
  39. public static DataLine.Info TTS_DATA_LINE_INFO = new DataLine.Info(SourceDataLine.class, TTS_AUDIO_FORMAT, AudioSystem.NOT_SPECIFIED);
  40. public static SourceDataLine TTS_SOURCE_DATA_LINE; // 播放
  41. static {
  42. try {
  43. TTS_SOURCE_DATA_LINE = (SourceDataLine) AudioSystem.getLine(Constants.TTS_DATA_LINE_INFO);
  44. } catch (LineUnavailableException e) {
  45. e.printStackTrace();
  46. }
  47. }
  48. // 3、离线命令词相关
  49. public static final String ASR_DLL_PATH = "src/main/resources/asr_msc_x64.dll"; // windows动态库路径
  50. public static final String ASR_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
  51. public static final String ASR_CALL_BNF_PATH = "src/main/resources/msc/res/asr/call.bnf";
  52. public static final String ASR_BUILD_PARAMS = "engine_type = local,asr_res_path = fo|res/asr/common.jet," +
  53. "sample_rate = 16000,grm_build_path = res/asr/GrmBuilld_x64";
  54. public static final String ASR_LEX_PARAMS = "engine_type=local,asr_res_path = fo|res/asr/common.jet, " +
  55. "sample_rate = 16000,grm_build_path =res/asr/GrmBuilld_x64, grammar_list =call";
  56. public static IntByReference ASR_ERROR_CODE = new IntByReference(-100);
  57. public static final String ASR_SESSION_PARAMS = "vad_bos =3000 ,vad_eos = 10000,engine_type = local,asr_res_path = fo|res/asr/common.jet, " +
  58. "sample_rate = 16000,grm_build_path = res/asr/GrmBuilld_x64, local_grammar = call,result_type = json, result_encoding = UTF8";
  59. public static IntByReference ASR_EP_STATUS = new IntByReference(-100);
  60. public static IntByReference ASR_RECOG_STATUS = new IntByReference(-100);
  61. public static Integer ASR_AUDIO_STATUS = 1;
  62. public static Integer ASR_FRAME_SIZE = 640; // 16k采样率的16位音频,一帧的大小为640Byte(来自Windows SDK的说明)
  63. public static FileInputStream ASR_FILE_INPUT_STREAM;
  64. public static String ASR_GRAMMAR_CONTENT;
  65. public static IntByReference ASR_RESULT_STATUS = new IntByReference(-100);
  66. }

4、唤醒方法重写(唤醒成功执行回调函数,往下看)

  1. package com.day.service;
  2. import com.day.config.Constants;
  3. import com.day.service.imp.IvwCallback;
  4. import com.sun.jna.Library;
  5. import com.sun.jna.Native;
  6. import com.sun.jna.ptr.IntByReference;
  7. public interface IvwService extends Library {
  8. /**
  9. * 重点:
  10. * 1.char * 对应 String
  11. * 2.int * 对应 IntByReference
  12. * 3.void * 对应 Pointer或byte[]
  13. * 4.int 对应 int
  14. * 5.无参 对应 无参
  15. * 6.回调函数 对应 根据文档自定义回调函数,实现接口Callback
  16. */
  17. //加载dll动态库并实例化,从而使用其内部的方法
  18. IvwService INSTANCE = Native.loadLibrary(Constants.IVW_DLL_PATH, IvwService.class);
  19. //定义登录方法 MSPLogin(const char *usr, const char *pwd, const char *params)
  20. public Integer MSPLogin(String usr, String pwd, String params);
  21. //定义开始方法 QIVWSessionbegin(const char *grammarList, const char *params, int *errorCode)
  22. public String QIVWSessionBegin(String grammarList, String params, IntByReference errorCode);
  23. //定义写音频方法 QIVWAudioWrite(const char *sessionID, const void *audioData, unsigned int audioLen, int audioStatus)
  24. public Integer QIVWAudioWrite(String sessionID, byte[] audioData, int audioLen, int audioStatus);
  25. //定义结束方法 QIVWSessionEnd(const char *sessionID, const char *hints)
  26. public Integer QIVWSessionEnd(String sessionID, String hints);
  27. //定义获取结果方法 QIVWRegisterNotify(const char *sessionID, ivw_ntf_handler msgProcCb, void *userData)
  28. public Integer QIVWRegisterNotify(String sessionID, IvwCallback ivwCallback, byte[] userData);
  29. //定义退出方法 唤醒一般不用退出
  30. public Integer MSPLogout();
  31. }

 5、合成方法重写

  1. package com.day.service;
  2. import com.day.config.Constants;
  3. import com.sun.jna.Library;
  4. import com.sun.jna.Native;
  5. import com.sun.jna.Pointer;
  6. import com.sun.jna.ptr.IntByReference;
  7. public interface TtsService extends Library {
  8. /**
  9. * 重点:
  10. * 1.char * 对应 String
  11. * 2.int * 对应 IntByReference
  12. * 3.void * 对应 byte[]/Pointer,回调函数里此类型需用String来对应。
  13. * 4.int 对应 int
  14. * 5.无参 对应 void
  15. * 6.回调函数 对应 根据文档自定义回调函数,实现接口Callback,离线语音合成无回调
  16. */
  17. //加载dll动态库并实例化,从而使用其内部的方法
  18. TtsService INSTANCE = Native.loadLibrary(Constants.TTS_DLL_PATH, TtsService.class);
  19. //定义登录方法
  20. public Integer MSPLogin(String usr, String pwd, String params);
  21. //开始一次普通离线语音合成
  22. public String QTTSSessionBegin(String params, IntByReference errorCode);
  23. //写入需要合成的文本
  24. public Integer QTTSTextPut(String sessionID, String textString, int textLen, String params);
  25. //获取离线合成的音频
  26. public Pointer QTTSAudioGet(String sessionID, IntByReference audioLen, IntByReference synthStatus, IntByReference errorCode);
  27. //结束本次普通离线语音合成
  28. public Integer QTTSSessionEnd(String sessionID, String hints);
  29. //定义退出方法
  30. public Integer MSPLogout();
  31. }

6、离线命令词方法重写

  1. package com.day.service;
  2. import com.day.config.Constants;
  3. import com.day.service.imp.AsrGrammarCallback;
  4. import com.day.service.imp.AsrLexiconCallback;
  5. import com.sun.jna.Library;
  6. import com.sun.jna.Native;
  7. import com.sun.jna.ptr.IntByReference;
  8. public interface AsrService extends Library {
  9. /**
  10. * 重点:
  11. * 1.char * 对应 String
  12. * 2.int * 对应 IntByReference
  13. * 3.void * 对应 byte[],回调函数里此类型需用String来对应。
  14. * 4.int 对应 int
  15. * 5.无参 对应 void
  16. * 6.回调函数 对应 根据文档自定义回调函数,实现接口Callback
  17. */
  18. //加载dll动态库并实例化,从而使用其内部的方法
  19. AsrService INSTANCE = Native.loadLibrary(Constants.ASR_DLL_PATH, AsrService.class);
  20. //定义登录方法
  21. public Integer MSPLogin(String usr, String pwd, String params);
  22. //开始一次语音识别。
  23. public String QISRSessionBegin(String grammarList, String params, IntByReference errorCode);
  24. //写入本次识别的音频
  25. public Integer QISRAudioWrite(String sessionID, byte[] byteArrayAudioData, int waveLen, int audioStatus, IntByReference epStatus, IntByReference recogStatus);
  26. //获取识别结果。
  27. public String QISRGetResult(String sessionID, IntByReference rsltStatus, int waitTime, IntByReference errorCode);
  28. //结束本次语音识别。
  29. public Integer QISRSessionEnd(String sessionID, String hints);
  30. //获取当次语音识别信息,如上行流量、下行流量等
  31. public Integer QISRGetParam(String sessionID, String paramName, String paramValue, IntByReference valueLen);
  32. //构建语法,生成语法ID。有回调
  33. public Integer QISRBuildGrammar(String grammarType, String grammarContent, int grammarLength, String params, AsrGrammarCallback asrGrammarCallback, byte[] userData);
  34. //更新本地语法词典。有回调
  35. public Integer QISRUpdateLexicon(String lexiconName, String lexiconContent, int lexiconLength, String params, AsrLexiconCallback asrLexiconCallback, byte[] userData);
  36. //定义退出方法
  37. public Integer MSPLogout();
  38. }

7、回调函数的定义(1个唤醒的,2个离线命令词的)

  1. package com.day.service.imp;
  2. import com.day.AIMain;
  3. import com.sun.jna.Callback;
  4. public class IvwCallback implements Callback {
  5. public int cb_ivw_msg_proc(String sessionID, int msg, int param1, int param2,
  6. String info, String userData) {
  7. System.out.println("回调函数返回的唤醒结果...:" + info);
  8. AIMain.startTts("在的,请说指令");
  9. AIMain.startAsr(); // 答复完毕调用命令词
  10. return 0;
  11. }
  12. }
  1. package com.day.service.imp;
  2. import com.sun.jna.Callback;
  3. public class AsrGrammarCallback implements Callback {
  4. public int build_grm_cb(int errorCode, String info, String userData) {
  5. System.out.println("构建语法返回的ID信息...:" + info + ",错误码...:" + errorCode);
  6. return 0;
  7. }
  8. }
  1. package com.day.service.imp;
  2. import com.sun.jna.Callback;
  3. public class AsrLexiconCallback implements Callback {
  4. public int LexiconCallBack(int errorCode, String info, String userData) {
  5. System.out.println("更新词典返回的信息...:" + info + ",错误码...:" + errorCode);
  6. return 0;
  7. }
  8. }

8、为了方便各位看官,上POM文件

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  4. <modelVersion>4.0.0</modelVersion>
  5. <parent>
  6. <!--父工程坐标############################################################################################-->
  7. <groupId>org.springframework.boot</groupId>
  8. <artifactId>spring-boot-starter-parent</artifactId>
  9. <version>2.1.6.RELEASE</version>
  10. <relativePath/> <!-- lookup parent from repository -->
  11. </parent>
  12. <!--自己被别人引用的坐标#########################################################################################-->
  13. <groupId>com.example</groupId>
  14. <artifactId>day</artifactId>
  15. <version>0.0.1-SNAPSHOT</version>
  16. <name>day</name>
  17. <description>day</description>
  18. <!--指定JDK版本################################################################################################-->
  19. <properties>
  20. <java.version>1.8</java.version>
  21. </properties>
  22. <!--总体依赖JAR################################################################################################-->
  23. <dependencies>
  24. <!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
  25. <dependency>
  26. <groupId>com.google.code.gson</groupId>
  27. <artifactId>gson</artifactId>
  28. <version>2.10.1</version>
  29. </dependency>
  30. <!-- https://mvnrepository.com/artifact/net.java.dev.jna/jna -->
  31. <dependency>
  32. <groupId>net.java.dev.jna</groupId>
  33. <artifactId>jna</artifactId>
  34. <version>5.5.0</version>
  35. </dependency>
  36. </dependencies>
  37. <build>
  38. <plugins>
  39. <plugin>
  40. <groupId>org.springframework.boot</groupId>
  41. <artifactId>spring-boot-maven-plugin</artifactId>
  42. <version>2.1.6.RELEASE</version>
  43. </plugin>
  44. </plugins>
  45. </build>
  46. <!--配置阿里云仓库下载-->
  47. <repositories>
  48. <repository>
  49. <id>nexus-aliyun</id>
  50. <name>nexus-aliyun</name>
  51. <url>https://maven.aliyun.com/nexus/content/groups/public/</url>
  52. <releases>
  53. <enabled>true</enabled>
  54. </releases>
  55. <snapshots>
  56. <enabled>false</enabled>
  57. </snapshots>
  58. </repository>
  59. </repositories>
  60. <pluginRepositories>
  61. <pluginRepository>
  62. <id>public</id>
  63. <name>nexus-aliyun</name>
  64. <url>https://maven.aliyun.com/nexus/content/groups/public/</url>
  65. <releases>
  66. <enabled>true</enabled>
  67. </releases>
  68. <snapshots>
  69. <enabled>false</enabled>
  70. </snapshots>
  71. </pluginRepository>
  72. </pluginRepositories>
  73. </project>

9、命令词也给一份示例Call.bnf(老生常谈,注意放置位置)

  1. #BNF+IAT 1.0;
  2. !grammar call;
  3. !slot <enter>;
  4. !slot <scanSolicitation>;
  5. !slot <scanDelivery>;
  6. !slot <exit>;
  7. !start <callStart>;
  8. <callStart>:[<enter>][<scanSolicitation>][<scanDelivery>][<exit>];
  9. <enter>:立刻|马上|一分钟后|十分钟后|半小时后;
  10. <scanSolicitation>:打开|关闭|调量|调暗|调高|调低;
  11. <scanDelivery>:主卧|次卧|书房|客厅;
  12. <exit>:空调|点灯|窗户|窗帘|衣柜;

10、实现完美的智能语音交互,感兴趣的可以结合下大模型做智能问答场景。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/AllinToyou/article/detail/124100
推荐阅读
相关标签
  

闽ICP备14008679号