当前位置:   article > 正文

讯飞离线语音命令词+TTS离线发音,实现命令词交互(windows dll for unity插件)_asr_offline_record_sample

asr_offline_record_sample

步骤一

登录讯飞平台注册并创建自己的应用

https://www.xfyun.cn 

 

步骤二

下载离线命令词识别SDK windows MSC ,注意下载的SDK会根据你的appid捆绑在代码段里,应该是讯飞的版权策略

步骤三

下载离线语音合成(普通版)SDK ,

 

步骤四

根据官方提示需要使用 vs2010 版本 不想折腾版本兼容问题,就乖乖安装。

链接:网盘下载地址    提取码:9541 

步骤五

1.解压  离线命令词识别SDK压缩包 和 离线语音合成(普通版)压缩包。

2.使用vs2010打开项目/解决方案  打开解压后samples文件夹里的工程文件。

3.配置换开发环境 ,请参考  离线命令词识别 Windows SDK 文档 和 离线语音合成 Windows SDK 文档

4.确定samples工程可以正常运行。

步骤六 

将离线语音合成打包成DLL库

1.当前运行的工程为win32控制台模式,修改成dll ,右键项目解决方案 -> 属性 ->常规->配置类型修改成(动态库.dll)

2.替换下面改造dll代码,tts_offline_sample.c,注意替换成自己的appid

  1. /*
  2. * 语音合成(Text To Speech,TTS)技术能够自动将任意文字实时转换为连续的
  3. * 自然语音,是一种能够在任何时间、任何地点,向任何人提供语音信息服务的
  4. * 高效便捷手段,非常符合信息时代海量数据、动态更新和个性化查询的需求。
  5. */
  6. #include <stdlib.h>
  7. #include <stdio.h>
  8. #include <errno.h>
  9. #include <intrin.h>
  10. #include <string.h>
  11. #include <Windows.h>
  12. #include <locale.h>
  13. #include "qtts.h"
  14. #include "msp_cmn.h"
  15. #include "msp_errors.h"
  16. #ifdef _WIN64
  17. #pragma comment(lib,"../../libs/msc_x64.lib")//x64
  18. #else
  19. #pragma comment(lib,"../../libs/msc.lib")//x86
  20. #endif
  21. /* wav音频头部格式 */
  22. typedef struct _wave_pcm_hdr
  23. {
  24.     char            riff[4];                // = "RIFF"
  25.     int                size_8;                 // = FileSize - 8
  26.     char            wave[4];                // = "WAVE"
  27.     char            fmt[4];                 // = "fmt "
  28.     int                fmt_size;                // = 下一个结构体的大小 : 16
  29.     short int       format_tag;             // = PCM : 1
  30.     short int       channels;               // = 通道数 : 1
  31.     int                samples_per_sec;        // = 采样率 : 8000 | 6000 | 11025 | 16000
  32.     int                avg_bytes_per_sec;      // = 每秒字节数 : samples_per_sec * bits_per_sample / 8
  33.     short int       block_align;            // = 每采样点字节数 : wBitsPerSample / 8
  34.     short int       bits_per_sample;        // = 量化比特数: 8 | 16
  35.     char            data[4];                // = "data";
  36.     int                data_size;              // = 纯数据长度 : FileSize - 44 
  37. } wave_pcm_hdr;
  38. /* 默认wav音频头部数据 */
  39. wave_pcm_hdr default_wav_hdr = 
  40. {
  41.     { 'R', 'I', 'F', 'F' },
  42.     0,
  43.     {'W', 'A', 'V', 'E'},
  44.     {'f', 'm', 't', ' '},
  45.     16,
  46.     1,
  47.     1,
  48.     16000,
  49.     32000,
  50.     2,
  51.     16,
  52.     {'d', 'a', 't', 'a'},
  53.     0  
  54. };
  55. char *wchar_t_to_char(const wchar_t *str){
  56.     char *result = NULL;
  57.     int textlen = 0;
  58.     textlen = WideCharToMultiByte(CP_ACP,0,str,-1,NULL,0,NULL,NULL);
  59.     result = (char*)malloc((textlen+1)*sizeof(char));
  60.     memset(result,0,sizeof(char)*(textlen+1));
  61.     WideCharToMultiByte(CP_ACP,0,str,-1,result,textlen,NULL,NULL);
  62.     return result;
  63. }
  64. /* 文本合成 */
  65. int text_to_speech(const char* src_text, const char* des_path, const char* params)
  66. {
  67.     int          ret          = -1;
  68.     FILE*        fp           = NULL;
  69.     const char*  sessionID    = NULL;
  70.     unsigned int audio_len    = 0;
  71.     wave_pcm_hdr wav_hdr      = default_wav_hdr;
  72.     int          synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA;
  73.     if (NULL == src_text || NULL == des_path)
  74.     {
  75.         printf("params is error!\n");
  76.         return 0;
  77.     }
  78.     fp = fopen(des_path, "wb");
  79.     if (NULL == fp)
  80.     {
  81.         printf("open %s error.\n", des_path);
  82.         return 1;
  83.     }
  84.     /* 开始合成 */
  85.     sessionID = QTTSSessionBegin(params, &ret);
  86.     if (MSP_SUCCESS != ret)
  87.     {
  88.         printf("QTTSSessionBegin failed, error code: %d.\n", ret);
  89.         fclose(fp);
  90.         return 2;
  91.     }
  92.     ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL);
  93.     if (MSP_SUCCESS != ret)
  94.     {
  95.         printf("QTTSTextPut failed, error code: %d.\n",ret);
  96.         QTTSSessionEnd(sessionID, "TextPutError");
  97.         fclose(fp);
  98.         return 3;
  99.     }
  100.     printf("正在合成 ...\n");
  101.     fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音频头,使用采样率为16000
  102.     while (1
  103.     {
  104.         /* 获取合成音频 */
  105.         const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret);
  106.         if (MSP_SUCCESS != ret)
  107.             break;
  108.         if (NULL != data)
  109.         {
  110.             fwrite(data, audio_len, 1, fp);
  111.             wav_hdr.data_size += audio_len; //计算data_size大小
  112.         }
  113.         if (MSP_TTS_FLAG_DATA_END == synth_status)
  114.             break;
  115.     }
  116.     printf("\n");
  117.     if (MSP_SUCCESS != ret)
  118.     {
  119.         printf("QTTSAudioGet failed, error code: %d.\n",ret);
  120.         QTTSSessionEnd(sessionID, "AudioGetError");
  121.         fclose(fp);
  122.         return 4;
  123.     }
  124.     /* 修正wav文件头数据的大小 */
  125.     wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8);
  126.     /* 将修正过的数据写回文件头部,音频文件为wav格式 */
  127.     fseek(fp, 4, 0);
  128.     fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //写入size_8的值
  129.     fseek(fp, 40, 0); //将文件指针偏移到存储data_size值的位置
  130.     fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //写入data_size的值
  131.     fclose(fp);
  132.     fp = NULL;
  133.     /* 合成完毕 */
  134.     ret = QTTSSessionEnd(sessionID, "Normal");
  135.     if (MSP_SUCCESS != ret)
  136.     {
  137.         printf("QTTSSessionEnd failed, error code: %d.\n",ret);
  138.     }
  139.     return 5;
  140. }
  141. /* 用户登录 */
  142. __declspec(dllexport)  int loading_tts(){
  143.     int         ret                  = MSP_SUCCESS;
  144.     const char* login_params         = "appid = 替换自己的appid, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动
  145.     ret = MSPLogin(NULL, NULL, login_params); //第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://www.xfyun.cn注册获取
  146.     if (MSP_SUCCESS != ret)
  147.     {
  148.         MSPLogout(); 
  149.         return 0;
  150.     }
  151.     return 1;
  152. }
  153. /* 开始转换 */
  154. __declspec(dllexport)  int to_speech(wchar_t * wav_path, wchar_t * tts_text){
  155.     int         ret                  = MSP_SUCCESS;
  156.     const char* session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = GB2312, tts_res_path = fo|C:\\xunfei_speech\\xiaoyan.jet;fo|C:\\xunfei_speech\\common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
  157.     //const char* filename             = "C:\\xunfei_speech\\tts_sample.wav"; //合成的语音文件名称s
  158.     //const char* text                 = "亲爱的用户,您好,这是一个语音合成示例,感谢您对小螺丝语音技术的支持!小螺丝是亚太地区最大的上市公司,股票代码:002233"; //合成文本
  159.     const char* filename             =wchar_t_to_char(wav_path);
  160.     const char* text                 =wchar_t_to_char(tts_text);
  161.     ret = text_to_speech(text, filename, session_begin_params);
  162.     return ret;
  163. }
  164. /* 释放 */
  165. __declspec(dllexport)  int free_tts(){
  166.    MSPLogout(); 
  167.    return 1;
  168. }

3.重新生成DLL

 

步骤七

将离线语音命令词打包成DLL库

1.当前运行的工程为win32控制台模式,修改成dll ,右键项目解决方案 -> 属性 ->常规->配置类型修改成(动态库.dll)

2.替换下面改造dll代码,asr_record_sample.c,注意替换成自己的appid

  1. /*
  2. * 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。
  3. */
  4. #include <stdlib.h>
  5. #include <stdio.h>
  6. //#include <string.h>
  7. #include <windows.h>
  8. #include <conio.h>
  9. #include <errno.h>
  10. #include <process.h>
  11. #include "../../include/qisr.h"
  12. #include "../../include/msp_cmn.h"
  13. #include "../../include/msp_errors.h"
  14. #include "./include/speech_recognizer.h"
  15. #define FRAME_LEN 640
  16. #define BUFFER_SIZE 4096
  17. enum{
  18. EVT_START = 0,
  19. EVT_STOP,
  20. EVT_QUIT,
  21. EVT_TOTAL
  22. };
  23. static HANDLE events[EVT_TOTAL] = {NULL,NULL,NULL};
  24. static COORD begin_pos = {0, 0};
  25. static COORD last_pos = {0, 0};
  26. #define SAMPLE_RATE_16K (16000)
  27. #define SAMPLE_RATE_8K (8000)
  28. #define MAX_GRAMMARID_LEN (32)
  29. #define MAX_PARAMS_LEN (1024)
  30. const char * ASR_RES_PATH = "fo|C:\\xunfei_speech\\command_speech\\common.jet"; //离线语法识别资源路径
  31. #ifdef _WIN64
  32. const char * GRM_BUILD_PATH = "C:\\xunfei_speech\\command_speech\\GrmBuilld_x64"; //构建离线语法识别网络生成数据保存路径
  33. #else
  34. const char * GRM_BUILD_PATH = "C:\\xunfei_speech\\command_speech\\GrmBuilld"; //构建离线语法识别网络生成数据保存路径
  35. #endif
  36. const char * GRM_FILE = "C:\\xunfei_speech\\command_speech\\call.bnf"; //构建离线识别语法网络所用的语法文件
  37. const char * LEX_NAME = "contact"; //更新离线识别语法的contact槽(语法文件为此示例中使用的call.bnf)
  38. typedef struct _UserData {
  39. int build_fini; //标识语法构建是否完成
  40. int update_fini; //标识更新词典是否完成
  41. int errcode; //记录语法构建或更新词典回调错误码
  42. char grammar_id[MAX_GRAMMARID_LEN]; //保存语法构建返回的语法ID
  43. }UserData;
  44. const char *get_audio_file(void); //选择进行离线语法识别的语音文件
  45. int build_grammar(UserData *udata); //构建离线识别语法网络
  46. int update_lexicon(UserData *udata); //更新离线识别语法词典
  47. int run_asr(UserData *udata); //进行离线语法识别
  48. int build_grm_cb(int ecode, const char *info, void *udata)
  49. {
  50. UserData *grm_data = (UserData *)udata;
  51. if (NULL != grm_data) {
  52. grm_data->build_fini = 1;
  53. grm_data->errcode = ecode;
  54. }
  55. if (MSP_SUCCESS == ecode && NULL != info) {
  56. printf("构建语法成功! 语法ID:%s\n", info);
  57. if (NULL != grm_data)
  58. _snprintf(grm_data->grammar_id, MAX_GRAMMARID_LEN - 1, info);
  59. }
  60. else
  61. printf("构建语法失败!%d\n", ecode);
  62. return 0;
  63. }
  64. int build_grammar(UserData *udata)
  65. {
  66. FILE *grm_file = NULL;
  67. char *grm_content = NULL;
  68. unsigned int grm_cnt_len = 0;
  69. char grm_build_params[MAX_PARAMS_LEN] = {NULL};
  70. int ret = 0;
  71. grm_file = fopen(GRM_FILE, "rb");
  72. if(NULL == grm_file) {
  73. printf("打开\"%s\"文件失败![%s]\n", GRM_FILE, strerror(errno));
  74. return -1;
  75. }
  76. fseek(grm_file, 0, SEEK_END);
  77. grm_cnt_len = ftell(grm_file);
  78. fseek(grm_file, 0, SEEK_SET);
  79. grm_content = (char *)malloc(grm_cnt_len + 1);
  80. if (NULL == grm_content)
  81. {
  82. printf("内存分配失败!\n");
  83. fclose(grm_file);
  84. grm_file = NULL;
  85. return -1;
  86. }
  87. fread((void*)grm_content, 1, grm_cnt_len, grm_file);
  88. grm_content[grm_cnt_len] = '\0';
  89. fclose(grm_file);
  90. grm_file = NULL;
  91. _snprintf(grm_build_params, MAX_PARAMS_LEN - 1,
  92. "engine_type = local, \
  93. asr_res_path = %s, sample_rate = %d, \
  94. grm_build_path = %s, ",
  95. ASR_RES_PATH,
  96. SAMPLE_RATE_16K,
  97. GRM_BUILD_PATH
  98. );
  99. ret = QISRBuildGrammar("bnf", grm_content, grm_cnt_len, grm_build_params, build_grm_cb, udata);
  100. free(grm_content);
  101. grm_content = NULL;
  102. return ret;
  103. }
  104. int update_lex_cb(int ecode, const char *info, void *udata)
  105. {
  106. UserData *lex_data = (UserData *)udata;
  107. if (NULL != lex_data) {
  108. lex_data->update_fini = 1;
  109. lex_data->errcode = ecode;
  110. }
  111. if (MSP_SUCCESS == ecode)
  112. printf("更新词典成功!\n");
  113. else
  114. printf("更新词典失败!%d\n", ecode);
  115. return 0;
  116. }
  117. int update_lexicon(UserData *udata)
  118. {
  119. const char *lex_content = "丁伟\n黄辣椒";
  120. unsigned int lex_cnt_len = strlen(lex_content);
  121. char update_lex_params[MAX_PARAMS_LEN] = {NULL};
  122. _snprintf(update_lex_params, MAX_PARAMS_LEN - 1,
  123. "engine_type = local, text_encoding = GB2312, \
  124. asr_res_path = %s, sample_rate = %d, \
  125. grm_build_path = %s, grammar_list = %s, ",
  126. ASR_RES_PATH,
  127. SAMPLE_RATE_16K,
  128. GRM_BUILD_PATH,
  129. udata->grammar_id);
  130. return QISRUpdateLexicon(LEX_NAME, lex_content, lex_cnt_len, update_lex_params, update_lex_cb, udata);
  131. }
  132. static void show_result(char *string, char is_over)
  133. {
  134. COORD orig, current;
  135. CONSOLE_SCREEN_BUFFER_INFO info;
  136. HANDLE w = GetStdHandle(STD_OUTPUT_HANDLE);
  137. GetConsoleScreenBufferInfo(w, &info);
  138. current = info.dwCursorPosition;
  139. if(current.X == last_pos.X && current.Y == last_pos.Y ) {
  140. SetConsoleCursorPosition(w, begin_pos);
  141. } else {
  142. /* changed by other routines, use the new pos as start */
  143. begin_pos = current;
  144. }
  145. if(is_over)
  146. SetConsoleTextAttribute(w, FOREGROUND_GREEN);
  147. printf("Result: [ %s ]\n", string);
  148. if(is_over)
  149. SetConsoleTextAttribute(w, info.wAttributes);
  150. GetConsoleScreenBufferInfo(w, &info);
  151. last_pos = info.dwCursorPosition;
  152. }
  153. static char *g_result = NULL; //
  154. static char bk_result[1024]="0";
  155. static unsigned int g_buffersize = BUFFER_SIZE;
  156. static UserData g_asr_data;
  157. struct speech_rec g_asr;
  158. void on_result(const char *result, char is_last)
  159. {
  160. if (result) {
  161. size_t left = g_buffersize - 1 - strlen(g_result);
  162. size_t size = strlen(result);
  163. if (left < size) {
  164. g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);
  165. if (g_result)
  166. g_buffersize += BUFFER_SIZE;
  167. else {
  168. printf("mem alloc failed\n");
  169. return;
  170. }
  171. }
  172. strncat(g_result, result, size);
  173. printf(g_result);
  174. strcpy(bk_result,g_result);
  175. //show_result(g_result, is_last);
  176. }
  177. }
  178. void on_speech_begin()
  179. {
  180. if (g_result)
  181. {
  182. free(g_result);
  183. }
  184. g_result = (char*)malloc(BUFFER_SIZE);
  185. g_buffersize = BUFFER_SIZE;
  186. memset(g_result, 0, g_buffersize);
  187. printf("Start Listening...\n");
  188. }
  189. void on_speech_end(int reason)
  190. {
  191. if (reason == END_REASON_VAD_DETECT){
  192. printf("\nSpeaking done \n");
  193. // g_result = (char*)"Speaking done";
  194. //strcpy(bk_result,"Speaking done");
  195. }else{
  196. printf("\nRecognizer error %d\n", reason);
  197. //g_result = (char*)"Recognizer error";
  198. //strcpy(bk_result,"Recognizer error");
  199. }
  200. }
  201. /* demo recognize the audio from microphone */
  202. static void recognize_mic(const char* session_begin_params)
  203. {
  204. int errcode;
  205. int i = 0;
  206. HANDLE helper_thread = NULL;
  207. DWORD waitres;
  208. char isquit = 0;
  209. struct speech_rec_notifier recnotifier = {
  210. on_result,
  211. on_speech_begin,
  212. on_speech_end
  213. };
  214. errcode = sr_init(&g_asr, session_begin_params, SR_MIC, DEFAULT_INPUT_DEVID, &recnotifier);
  215. if (errcode) {
  216. printf("speech recognizer init failed\n");
  217. return;
  218. }
  219. }
  220. int run_asr(UserData *udata)
  221. {
  222. char asr_params[MAX_PARAMS_LEN] = {NULL};
  223. const char *rec_rslt = NULL;
  224. const char *session_id = NULL;
  225. const char *asr_audiof = NULL;
  226. FILE *f_pcm = NULL;
  227. char *pcm_data = NULL;
  228. long pcm_count = 0;
  229. long pcm_size = 0;
  230. int last_audio = 0;
  231. int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
  232. int ep_status = MSP_EP_LOOKING_FOR_SPEECH;
  233. int rec_status = MSP_REC_STATUS_INCOMPLETE;
  234. int rss_status = MSP_REC_STATUS_INCOMPLETE;
  235. int errcode = -1;
  236. int aud_src = 1;
  237. //离线语法识别参数设置
  238. _snprintf(asr_params, MAX_PARAMS_LEN - 1,
  239. "engine_type = local, \
  240. asr_res_path = %s, sample_rate = %d, \
  241. grm_build_path = %s, local_grammar = %s, \
  242. result_type = xml, result_encoding = GB2312, ",
  243. ASR_RES_PATH,
  244. SAMPLE_RATE_16K,
  245. GRM_BUILD_PATH,
  246. udata->grammar_id
  247. );
  248. recognize_mic(asr_params);
  249. return 0;
  250. }
  251. int main(int argc, char* argv[]){
  252. const char *login_config = "appid = 5fa0d519"; //登录参数
  253. int ret = 0 ;
  254. ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
  255. if (MSP_SUCCESS != ret) {
  256. printf("登录失败:%d\n", ret);
  257. //goto exit;
  258. }
  259. memset(&g_asr_data, 0, sizeof(UserData));
  260. printf("构建离线识别语法网络...\n");
  261. ret = build_grammar(&g_asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
  262. if (MSP_SUCCESS != ret) {
  263. printf("构建语法调用失败!\n");
  264. }
  265. while (1 != g_asr_data.build_fini)
  266. _sleep(300);
  267. if (MSP_SUCCESS != g_asr_data.errcode)
  268. printf("离线识别语法网络构建完成,开始识别...\n");
  269. ret = run_asr(&g_asr_data);
  270. if (MSP_SUCCESS != ret) {
  271. printf("离线语法识别出错: %d \n", ret);
  272. }
  273. while(1){
  274. sr_start_listening(&g_asr);
  275. _sleep(1000);
  276. }
  277. return 1;
  278. }
  279. //初始化登录
  280. __declspec(dllexport) int loading_msp(){
  281. const char *login_config = "appid = 替换自己的appid"; //登录参数
  282. int ret = 0 ;
  283. ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
  284. if (MSP_SUCCESS != ret) {
  285. MSPLogout();
  286. return 0;
  287. }
  288. memset(&g_asr_data, 0, sizeof(UserData));
  289. return 1;
  290. }
  291. /* 构建命令词网络 */
  292. __declspec(dllexport) int build_net_msp(){
  293. int ret = 0 ;
  294. printf("构建离线识别语法网络...\n");
  295. ret = build_grammar(&g_asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
  296. if (MSP_SUCCESS != ret) {
  297. printf("构建语法调用失败!\n");
  298. return -1;
  299. }
  300. while (1 != g_asr_data.build_fini)
  301. _sleep(300);
  302. if (MSP_SUCCESS != g_asr_data.errcode)
  303. return 0;
  304. return 1;
  305. }
  306. /* 更新词典槽 */
  307. __declspec(dllexport) int update_lexicon_msp(){
  308. int ret = 0 ;
  309. printf("更新离线语法词典...\n");
  310. ret = update_lexicon(&g_asr_data); //当语法词典槽中的词条需要更新时,调用QISRUpdateLexicon接口完成更新
  311. if (MSP_SUCCESS != ret) {
  312. printf("更新词典调用失败!\n");
  313. return 0;
  314. }
  315. while (1 != g_asr_data.update_fini)
  316. _sleep(300);
  317. if (MSP_SUCCESS != g_asr_data.errcode)
  318. return 0;
  319. return 1;
  320. }
  321. /* 识别命令词 */
  322. __declspec(dllexport) int init_run_msp(){
  323. int ret = 0 ;
  324. ret = run_asr(&g_asr_data);
  325. if (MSP_SUCCESS != ret) {
  326. printf("离线语法识别出错: %d \n", ret);
  327. return 0;
  328. }
  329. return 1;
  330. }
  331. static wchar_t * char2wchar(char *str){
  332. int length = strlen(str)+1;
  333. wchar_t *t = (wchar_t*)malloc(sizeof(wchar_t)*length);
  334. memset(t,0,length*sizeof(wchar_t));
  335. MultiByteToWideChar(CP_ACP,0,str,strlen(str),t,length);
  336. return t;
  337. }
  338. /* 识别命令词 */
  339. __declspec(dllexport) wchar_t* start_listening(){
  340. wchar_t* re;
  341. if (sr_start_listening(&g_asr)!=NULL){
  342. // char * str1 = "小螺丝科技有限公司hahah1523";//g_result
  343. }
  344. re = char2wchar(bk_result);
  345. strcpy(bk_result,"0");
  346. return re;
  347. }
  348. /* 释放 */
  349. __declspec(dllexport) int free_msp(){
  350. MSPLogout();
  351. return 1;
  352. }

3.编译,重新生成DLL

4.提示 :打包dll后,确保在dll在unity里能够正常读取讯飞语音包引擎文件,这里添加了绝对路径 ,创建文件夹  c:\xunfei_speech 将SDK里的资源文件复制进去确保能够真确引用。

步骤八  Unity 调用 dll 实现业务交互调用

1.创建unity工程 speech_demo

2.Unity项目目录 Assets/Scenes 目录下创建目录 全路径为 Assets/Scenes/Plugins/Windows/X86_64(64位平台) Assets/Scenes/Plugins/Windows/X86(32位平台)创建Unity可引用的dll目录,将 asr_record_sample.dll、tts_offline_sample.dll,msc_x64.dll(两个引擎可共用一个),x86平台为msc_x.dll,复制到目录下。

3.创建脚本ToSpeech.cs,脚本代码如下:

  1. using System.Collections;
  2. using System.Collections.Generic;
  3. using UnityEngine;
  4. using System;
  5. using System.Text;
  6. using System.Linq;
  7. using System.IO;
  8. using System.Xml;
  9. using System.Runtime.InteropServices;
  10. public class ToSpeech : MonoBehaviour
  11. {
  12. //初始化TTS离线朗读语音引擎
  13. [DllImport("tts_offline_sample", EntryPoint = "loading_tts", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  14. extern static int loading_tts();
  15. //合成语音文件
  16. [DllImport("tts_offline_sample", EntryPoint = "to_speech", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  17. extern static int to_speech([MarshalAs(UnmanagedType.LPWStr)]string wav_path, [MarshalAs(UnmanagedType.LPWStr)]string tts_text);
  18. //释放TTS朗读语音引擎
  19. [DllImport("tts_offline_sample", EntryPoint = "free_tts", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  20. extern static int free_tts();
  21. //初始化命令词识别离线引擎
  22. [DllImport("asr_offline_record_sample", EntryPoint = "loading_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  23. public static extern int loading_msp();
  24. //初始化预热识别
  25. [DllImport("asr_offline_record_sample", EntryPoint = "init_run_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  26. static extern int init_run_msp();
  27. //构建识别网络
  28. [DllImport("asr_offline_record_sample", EntryPoint = "build_net_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  29. static extern int build_net_msp();
  30. //更新词典槽
  31. [DllImport("asr_offline_record_sample", EntryPoint = "update_lexicon_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  32. static extern int update_lexicon_msp();
  33. //开启监听——micphone
  34. [DllImport("asr_offline_record_sample", EntryPoint = "start_listening", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  35. static extern IntPtr start_listening();
  36. //释放命令词识别离线引擎
  37. [DllImport("asr_offline_record_sample", EntryPoint = "free_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  38. static extern int free_msp();
  39. //test
  40. //[DllImport("asr_offline_record_sample", EntryPoint = "test", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
  41. //public static extern int test();
  42. public bool listening_off = false; //监听器循环开启关闭开关
  43. public int confidenct = 25; //置信度调节
  44. private int tts_off = 0;
  45. private float lastTime;
  46. private float curTime;
  47. void Start()
  48. {
  49. init_listing();
  50. }
  51. void Update()
  52. {
  53. update_listing();
  54. }
  55. //讲述人///
  56. public AudioSource aud;//需要节点关联
  57. IEnumerator LoadAudio(string recordPath)
  58. {
  59. WWW www = new WWW(recordPath);
  60. yield return www;
  61. var clipTemp = www.GetAudioClip();
  62. aud.clip = clipTemp;
  63. aud.Play();
  64. }
  65. private void speech_tts(string tts_text)
  66. {
  67. string wav_path = @"C:\\xunfei_speech\\" + tts_text + ".wav";
  68. if (File.Exists(wav_path))
  69. {
  70. StartCoroutine(LoadAudio(wav_path));
  71. return;
  72. }
  73. //如果文件已经生成就不用在合成了,直接读取播放
  74. tts_off = loading_tts();
  75. if (tts_off == 1)
  76. {
  77. print("初始化朗读成功:" + tts_off);
  78. int ret2 = to_speech(wav_path, tts_text);
  79. StartCoroutine(LoadAudio(wav_path));
  80. }
  81. else
  82. {
  83. print("初始化失败:" + tts_off);
  84. }
  85. free_tts();
  86. }
  87. /聆听人//
  88. //功能:初始化监听器
  89. void init_listing()
  90. {
  91. if (1 == loading_msp())
  92. {
  93. print("载入初始化成功");
  94. //实际应用中只执行一次///
  95. if (1 == build_net_msp())
  96. {
  97. print("下载构建网络命令词成功");
  98. if (1 == update_lexicon_msp())
  99. {
  100. print("更新词典槽成功");
  101. }
  102. }
  103. if (1 == init_run_msp())
  104. {
  105. print("预热命令词成功");
  106. listening_off = true;
  107. }
  108. }
  109. lastTime = Time.time;
  110. }
  111. //功能:循环发送mic监听器开启,可以重复发送,sdk 做了同步处理
  112. //返回:xml格式数据
  113. string update_listing()
  114. {
  115. string restr = "0";
  116. curTime = Time.time;
  117. if (curTime - lastTime >= 1)
  118. {
  119. if (listening_off)
  120. {
  121. IntPtr ip = start_listening();
  122. restr = Marshal.PtrToStringUni(ip);
  123. print("打开麦克风监听中..." + restr);
  124. get_result_confidenct(restr);//解析xml
  125. }
  126. // print(curTime - lastTime);
  127. lastTime = curTime;
  128. }
  129. return restr;
  130. }
  131. //功能:释放监听器
  132. void OnDestroy(){
  133. free_msp();
  134. print("退出了");
  135. }
  136. //功能:语音识别结果返回xml解析
  137. //传入:xml字符串;
  138. //返回:置信度结果,是一个0~1的浮点类型数字;
  139. double get_result_confidenct(string strxml)
  140. {
  141. // string strxml = @"<?xml version='1.0' encoding='gb2312' standalone='yes' ?><nlp> <version>1.1</version> <rawtext>您好</rawtext> <confidence>27</confidence> <engine>local</engine> <result> <focus>您好</focus> <confidence>18</confidence> <object> <您好 id='65535'>您好</您好> </object> </result></nlp>";
  142. if (Equals(strxml,"0")) { return 0; } //输入不是xml过滤掉 直接 返回结果;
  143. XmlDocument xml = new XmlDocument();
  144. XmlReaderSettings set = new XmlReaderSettings();
  145. set.IgnoreComments = true;
  146. xml.LoadXml(strxml);
  147. int s_confidenct = 0;
  148. string s_rawtext = "";
  149. int d_confidenct = 0;
  150. string d_focus = "";
  151. double res = 0;
  152. XmlNodeList sour_xmlNodeList = xml.SelectSingleNode("nlp").ChildNodes;
  153. foreach (XmlElement node in sour_xmlNodeList)
  154. {
  155. if (node.Name == "confidence")
  156. {
  157. s_confidenct = int.Parse(node.InnerText.Trim());
  158. print(node.Name + ":" + s_confidenct);
  159. }
  160. if (node.Name == "rawtext")
  161. {
  162. s_rawtext = node.InnerText.Trim();
  163. print(node.Name + ":" + s_rawtext);
  164. }
  165. }
  166. XmlNodeList back_dst_xmlNodeList = xml.SelectSingleNode("nlp").SelectSingleNode("result").ChildNodes;
  167. foreach (XmlElement node in back_dst_xmlNodeList)
  168. {
  169. if (node.Name == "confidence")
  170. {
  171. d_confidenct = int.Parse(node.InnerText.Trim());
  172. print(node.Name + ":" + d_confidenct);
  173. }
  174. if (node.Name == "focus")
  175. {
  176. d_focus = node.InnerText.Trim();
  177. print(node.Name + ":" + d_focus);
  178. }
  179. }
  180. if (s_confidenct > confidenct)//识别置信度调节
  181. {
  182. send_speech_result(d_focus, s_confidenct);
  183. }
  184. return res; //返回匹配相似度;
  185. }
  186. //功能:向父脚本子脚本广播识别结果
  187. //传入:1.结果文本|2.置信度|3.计算的匹配率 0~1 之间的浮点数
  188. void send_speech_result(string focus, int percent)
  189. {
  190. listening_off = false;
  191. free_msp();//临时退出登录
  192. if (Equals(focus, "小微") || Equals(focus, "小微小微"))
  193. {
  194. speech_tts("在的");
  195. init_listing();
  196. return;
  197. }
  198. speech_tts(focus);
  199. init_listing();
  200. //以下是将语音识别结果广播给其他节点
  201. object[] message = new object[2];
  202. message[0] = focus;
  203. message[1] = percent;
  204. this.gameObject.SendMessage("speech_result", message, SendMessageOptions.DontRequireReceiver);
  205. this.gameObject.BroadcastMessage("speech_result", message, SendMessageOptions.DontRequireReceiver);
  206. }
  207. /
  208. }

4.Unity创建 节点1 2D Sprinte 节点2 Audio Audio source  将节点2拖拽到节点1 Inspector面板 Aud节点实现引用关联。

5.该应用使用命令词实现了类似本地唤醒功能,软件运行后一直update 麦克风,监听到命令响应在的 或做其他命令处理。

6.语音合成WAV文件,做了本地保存,反复调用直接读取本地文件,不用反复使用TTS作文字到语音的转换,以节省资源。

步骤九

巴科斯范式.bnf文件可以关注这里巴科斯范式详解,这里简化了命令词定义用于测试。

  1. #BNF+IAT 1.0;
  2. !grammar call;
  3. !start <callstart>;
  4. <callstart>:syj|小微小微|小微|向前走|下一步|hello|你好|您
  5. 好|小螺丝|星际迷航|大陆|今天太冷了;

步骤九

问题总结

1,两个离线引擎不能同时运行,所以上面实现的是交替运行模式  就是识别模块运行,朗读模块初始化会失败,反之亦然,这样导致灵活性不强,不确定问题在哪里,也可能是引用了一个msc_x64.dll文件,这里不太确定,unity目录引用原因未能做测试,期待解决方案。

2.离线命令词识别返回结果会有1-3秒的识别反应时间,才能返回结果 ,这个时间可以做一个语音回馈,但是没有返回结果就不能确定是否应该触发语音回馈,期待解决方案。

3.语音合成朗读速度很快,可以放心使用。

4.以上测试文件打包,有需要可以自行下载。

https://download.csdn.net/download/lijiefu123456/13098984

 

 

 

 

 

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/124098
推荐阅读
相关标签
  

闽ICP备14008679号