赞
踩
登录讯飞平台注册并创建自己的应用
步骤二
下载离线命令词识别SDK windows MSC ,注意下载的SDK会根据你的appid捆绑在代码段里,应该是讯飞的版权策略
步骤三
下载离线语音合成(普通版)SDK ,
步骤四
根据官方提示需要使用 vs2010 版本 不想折腾版本兼容问题,就乖乖安装。
链接:网盘下载地址 提取码:9541
步骤五
1.解压 离线命令词识别SDK压缩包 和 离线语音合成(普通版)压缩包。
2.使用vs2010打开项目/解决方案 打开解压后samples文件夹里的工程文件。
3.配置换开发环境 ,请参考 离线命令词识别 Windows SDK 文档 和 离线语音合成 Windows SDK 文档
4.确定samples工程可以正常运行。
步骤六
将离线语音合成打包成DLL库
1.当前运行的工程为win32控制台模式,修改成dll ,右键项目解决方案 -> 属性 ->常规->配置类型修改成(动态库.dll)
2.替换下面改造dll代码,tts_offline_sample.c,注意替换成自己的appid
- /*
- * 语音合成(Text To Speech,TTS)技术能够自动将任意文字实时转换为连续的
- * 自然语音,是一种能够在任何时间、任何地点,向任何人提供语音信息服务的
- * 高效便捷手段,非常符合信息时代海量数据、动态更新和个性化查询的需求。
- */
-
-
- #include <stdlib.h>
- #include <stdio.h>
- #include <errno.h>
- #include <intrin.h>
-
- #include <string.h>
- #include <Windows.h>
- #include <locale.h>
-
-
- #include "qtts.h"
- #include "msp_cmn.h"
- #include "msp_errors.h"
-
- #ifdef _WIN64
- #pragma comment(lib,"../../libs/msc_x64.lib")//x64
- #else
- #pragma comment(lib,"../../libs/msc.lib")//x86
- #endif
-
- /* wav音频头部格式 */
- typedef struct _wave_pcm_hdr
- {
- char riff[4]; // = "RIFF"
- int size_8; // = FileSize - 8
- char wave[4]; // = "WAVE"
- char fmt[4]; // = "fmt "
- int fmt_size; // = 下一个结构体的大小 : 16
-
- short int format_tag; // = PCM : 1
- short int channels; // = 通道数 : 1
- int samples_per_sec; // = 采样率 : 8000 | 6000 | 11025 | 16000
- int avg_bytes_per_sec; // = 每秒字节数 : samples_per_sec * bits_per_sample / 8
- short int block_align; // = 每采样点字节数 : wBitsPerSample / 8
- short int bits_per_sample; // = 量化比特数: 8 | 16
-
- char data[4]; // = "data";
- int data_size; // = 纯数据长度 : FileSize - 44
- } wave_pcm_hdr;
-
- /* 默认wav音频头部数据 */
- wave_pcm_hdr default_wav_hdr =
- {
- { 'R', 'I', 'F', 'F' },
- 0,
- {'W', 'A', 'V', 'E'},
- {'f', 'm', 't', ' '},
- 16,
- 1,
- 1,
- 16000,
- 32000,
- 2,
- 16,
- {'d', 'a', 't', 'a'},
- 0
- };
-
-
- char *wchar_t_to_char(const wchar_t *str){
- char *result = NULL;
- int textlen = 0;
- textlen = WideCharToMultiByte(CP_ACP,0,str,-1,NULL,0,NULL,NULL);
- result = (char*)malloc((textlen+1)*sizeof(char));
- memset(result,0,sizeof(char)*(textlen+1));
- WideCharToMultiByte(CP_ACP,0,str,-1,result,textlen,NULL,NULL);
- return result;
- }
-
- /* 文本合成 */
- int text_to_speech(const char* src_text, const char* des_path, const char* params)
- {
- int ret = -1;
- FILE* fp = NULL;
- const char* sessionID = NULL;
- unsigned int audio_len = 0;
- wave_pcm_hdr wav_hdr = default_wav_hdr;
- int synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA;
-
- if (NULL == src_text || NULL == des_path)
- {
- printf("params is error!\n");
- return 0;
- }
- fp = fopen(des_path, "wb");
- if (NULL == fp)
- {
- printf("open %s error.\n", des_path);
- return 1;
- }
- /* 开始合成 */
- sessionID = QTTSSessionBegin(params, &ret);
- if (MSP_SUCCESS != ret)
- {
- printf("QTTSSessionBegin failed, error code: %d.\n", ret);
- fclose(fp);
- return 2;
- }
- ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL);
- if (MSP_SUCCESS != ret)
- {
- printf("QTTSTextPut failed, error code: %d.\n",ret);
- QTTSSessionEnd(sessionID, "TextPutError");
- fclose(fp);
- return 3;
- }
- printf("正在合成 ...\n");
- fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音频头,使用采样率为16000
- while (1)
- {
- /* 获取合成音频 */
- const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret);
- if (MSP_SUCCESS != ret)
- break;
- if (NULL != data)
- {
- fwrite(data, audio_len, 1, fp);
- wav_hdr.data_size += audio_len; //计算data_size大小
- }
- if (MSP_TTS_FLAG_DATA_END == synth_status)
- break;
- }
- printf("\n");
- if (MSP_SUCCESS != ret)
- {
- printf("QTTSAudioGet failed, error code: %d.\n",ret);
- QTTSSessionEnd(sessionID, "AudioGetError");
- fclose(fp);
- return 4;
- }
- /* 修正wav文件头数据的大小 */
- wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8);
-
- /* 将修正过的数据写回文件头部,音频文件为wav格式 */
- fseek(fp, 4, 0);
- fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //写入size_8的值
- fseek(fp, 40, 0); //将文件指针偏移到存储data_size值的位置
- fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //写入data_size的值
- fclose(fp);
- fp = NULL;
- /* 合成完毕 */
- ret = QTTSSessionEnd(sessionID, "Normal");
- if (MSP_SUCCESS != ret)
- {
- printf("QTTSSessionEnd failed, error code: %d.\n",ret);
- }
-
- return 5;
- }
-
- /* 用户登录 */
- __declspec(dllexport) int loading_tts(){
- int ret = MSP_SUCCESS;
- const char* login_params = "appid = 替换自己的appid, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动
- ret = MSPLogin(NULL, NULL, login_params); //第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://www.xfyun.cn注册获取
- if (MSP_SUCCESS != ret)
- {
- MSPLogout();
- return 0;
- }
- return 1;
- }
-
- /* 开始转换 */
- __declspec(dllexport) int to_speech(wchar_t * wav_path, wchar_t * tts_text){
- int ret = MSP_SUCCESS;
- const char* session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = GB2312, tts_res_path = fo|C:\\xunfei_speech\\xiaoyan.jet;fo|C:\\xunfei_speech\\common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
- //const char* filename = "C:\\xunfei_speech\\tts_sample.wav"; //合成的语音文件名称s
- //const char* text = "亲爱的用户,您好,这是一个语音合成示例,感谢您对小螺丝语音技术的支持!小螺丝是亚太地区最大的上市公司,股票代码:002233"; //合成文本
- const char* filename =wchar_t_to_char(wav_path);
- const char* text =wchar_t_to_char(tts_text);
- ret = text_to_speech(text, filename, session_begin_params);
- return ret;
- }
-
-
- /* 释放 */
- __declspec(dllexport) int free_tts(){
- MSPLogout();
- return 1;
- }
3.重新生成DLL
步骤七
将离线语音命令词打包成DLL库
1.当前运行的工程为win32控制台模式,修改成dll ,右键项目解决方案 -> 属性 ->常规->配置类型修改成(动态库.dll)
2.替换下面改造dll代码,asr_record_sample.c,注意替换成自己的appid
- /*
- * 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。
- */
-
- #include <stdlib.h>
- #include <stdio.h>
- //#include <string.h>
-
- #include <windows.h>
- #include <conio.h>
- #include <errno.h>
- #include <process.h>
-
-
- #include "../../include/qisr.h"
- #include "../../include/msp_cmn.h"
- #include "../../include/msp_errors.h"
- #include "./include/speech_recognizer.h"
-
- #define FRAME_LEN 640
- #define BUFFER_SIZE 4096
-
- enum{
- EVT_START = 0,
- EVT_STOP,
- EVT_QUIT,
- EVT_TOTAL
- };
- static HANDLE events[EVT_TOTAL] = {NULL,NULL,NULL};
-
- static COORD begin_pos = {0, 0};
- static COORD last_pos = {0, 0};
- #define SAMPLE_RATE_16K (16000)
- #define SAMPLE_RATE_8K (8000)
- #define MAX_GRAMMARID_LEN (32)
- #define MAX_PARAMS_LEN (1024)
-
- const char * ASR_RES_PATH = "fo|C:\\xunfei_speech\\command_speech\\common.jet"; //离线语法识别资源路径
- #ifdef _WIN64
- const char * GRM_BUILD_PATH = "C:\\xunfei_speech\\command_speech\\GrmBuilld_x64"; //构建离线语法识别网络生成数据保存路径
- #else
- const char * GRM_BUILD_PATH = "C:\\xunfei_speech\\command_speech\\GrmBuilld"; //构建离线语法识别网络生成数据保存路径
- #endif
- const char * GRM_FILE = "C:\\xunfei_speech\\command_speech\\call.bnf"; //构建离线识别语法网络所用的语法文件
- const char * LEX_NAME = "contact"; //更新离线识别语法的contact槽(语法文件为此示例中使用的call.bnf)
- typedef struct _UserData {
- int build_fini; //标识语法构建是否完成
- int update_fini; //标识更新词典是否完成
- int errcode; //记录语法构建或更新词典回调错误码
- char grammar_id[MAX_GRAMMARID_LEN]; //保存语法构建返回的语法ID
- }UserData;
-
- const char *get_audio_file(void); //选择进行离线语法识别的语音文件
- int build_grammar(UserData *udata); //构建离线识别语法网络
- int update_lexicon(UserData *udata); //更新离线识别语法词典
- int run_asr(UserData *udata); //进行离线语法识别
-
- int build_grm_cb(int ecode, const char *info, void *udata)
- {
- UserData *grm_data = (UserData *)udata;
-
- if (NULL != grm_data) {
- grm_data->build_fini = 1;
- grm_data->errcode = ecode;
- }
-
- if (MSP_SUCCESS == ecode && NULL != info) {
- printf("构建语法成功! 语法ID:%s\n", info);
- if (NULL != grm_data)
- _snprintf(grm_data->grammar_id, MAX_GRAMMARID_LEN - 1, info);
- }
- else
- printf("构建语法失败!%d\n", ecode);
-
- return 0;
- }
-
- int build_grammar(UserData *udata)
- {
- FILE *grm_file = NULL;
- char *grm_content = NULL;
- unsigned int grm_cnt_len = 0;
- char grm_build_params[MAX_PARAMS_LEN] = {NULL};
- int ret = 0;
-
- grm_file = fopen(GRM_FILE, "rb");
- if(NULL == grm_file) {
- printf("打开\"%s\"文件失败![%s]\n", GRM_FILE, strerror(errno));
- return -1;
- }
-
- fseek(grm_file, 0, SEEK_END);
- grm_cnt_len = ftell(grm_file);
- fseek(grm_file, 0, SEEK_SET);
-
- grm_content = (char *)malloc(grm_cnt_len + 1);
- if (NULL == grm_content)
- {
- printf("内存分配失败!\n");
- fclose(grm_file);
- grm_file = NULL;
- return -1;
- }
- fread((void*)grm_content, 1, grm_cnt_len, grm_file);
- grm_content[grm_cnt_len] = '\0';
- fclose(grm_file);
- grm_file = NULL;
-
- _snprintf(grm_build_params, MAX_PARAMS_LEN - 1,
- "engine_type = local, \
- asr_res_path = %s, sample_rate = %d, \
- grm_build_path = %s, ",
- ASR_RES_PATH,
- SAMPLE_RATE_16K,
- GRM_BUILD_PATH
- );
- ret = QISRBuildGrammar("bnf", grm_content, grm_cnt_len, grm_build_params, build_grm_cb, udata);
-
- free(grm_content);
- grm_content = NULL;
-
- return ret;
- }
-
- int update_lex_cb(int ecode, const char *info, void *udata)
- {
- UserData *lex_data = (UserData *)udata;
-
- if (NULL != lex_data) {
- lex_data->update_fini = 1;
- lex_data->errcode = ecode;
- }
-
- if (MSP_SUCCESS == ecode)
- printf("更新词典成功!\n");
- else
- printf("更新词典失败!%d\n", ecode);
-
- return 0;
- }
-
- int update_lexicon(UserData *udata)
- {
- const char *lex_content = "丁伟\n黄辣椒";
- unsigned int lex_cnt_len = strlen(lex_content);
- char update_lex_params[MAX_PARAMS_LEN] = {NULL};
-
- _snprintf(update_lex_params, MAX_PARAMS_LEN - 1,
- "engine_type = local, text_encoding = GB2312, \
- asr_res_path = %s, sample_rate = %d, \
- grm_build_path = %s, grammar_list = %s, ",
- ASR_RES_PATH,
- SAMPLE_RATE_16K,
- GRM_BUILD_PATH,
- udata->grammar_id);
- return QISRUpdateLexicon(LEX_NAME, lex_content, lex_cnt_len, update_lex_params, update_lex_cb, udata);
- }
-
-
- static void show_result(char *string, char is_over)
- {
- COORD orig, current;
- CONSOLE_SCREEN_BUFFER_INFO info;
- HANDLE w = GetStdHandle(STD_OUTPUT_HANDLE);
- GetConsoleScreenBufferInfo(w, &info);
- current = info.dwCursorPosition;
-
- if(current.X == last_pos.X && current.Y == last_pos.Y ) {
- SetConsoleCursorPosition(w, begin_pos);
- } else {
- /* changed by other routines, use the new pos as start */
- begin_pos = current;
- }
- if(is_over)
- SetConsoleTextAttribute(w, FOREGROUND_GREEN);
- printf("Result: [ %s ]\n", string);
- if(is_over)
- SetConsoleTextAttribute(w, info.wAttributes);
-
- GetConsoleScreenBufferInfo(w, &info);
- last_pos = info.dwCursorPosition;
- }
-
- static char *g_result = NULL; //
-
- static char bk_result[1024]="0";
-
-
- static unsigned int g_buffersize = BUFFER_SIZE;
- static UserData g_asr_data;
- struct speech_rec g_asr;
-
- void on_result(const char *result, char is_last)
- {
- if (result) {
- size_t left = g_buffersize - 1 - strlen(g_result);
- size_t size = strlen(result);
- if (left < size) {
- g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);
- if (g_result)
- g_buffersize += BUFFER_SIZE;
- else {
- printf("mem alloc failed\n");
- return;
- }
- }
-
- strncat(g_result, result, size);
- printf(g_result);
- strcpy(bk_result,g_result);
-
- //show_result(g_result, is_last);
- }
- }
- void on_speech_begin()
- {
- if (g_result)
- {
- free(g_result);
- }
-
-
- g_result = (char*)malloc(BUFFER_SIZE);
- g_buffersize = BUFFER_SIZE;
- memset(g_result, 0, g_buffersize);
- printf("Start Listening...\n");
-
-
- }
- void on_speech_end(int reason)
- {
- if (reason == END_REASON_VAD_DETECT){
- printf("\nSpeaking done \n");
- // g_result = (char*)"Speaking done";
- //strcpy(bk_result,"Speaking done");
- }else{
- printf("\nRecognizer error %d\n", reason);
- //g_result = (char*)"Recognizer error";
- //strcpy(bk_result,"Recognizer error");
- }
- }
-
- /* demo recognize the audio from microphone */
- static void recognize_mic(const char* session_begin_params)
- {
- int errcode;
- int i = 0;
- HANDLE helper_thread = NULL;
-
- DWORD waitres;
- char isquit = 0;
-
- struct speech_rec_notifier recnotifier = {
- on_result,
- on_speech_begin,
- on_speech_end
- };
-
- errcode = sr_init(&g_asr, session_begin_params, SR_MIC, DEFAULT_INPUT_DEVID, &recnotifier);
- if (errcode) {
- printf("speech recognizer init failed\n");
- return;
- }
- }
-
-
- int run_asr(UserData *udata)
- {
- char asr_params[MAX_PARAMS_LEN] = {NULL};
- const char *rec_rslt = NULL;
- const char *session_id = NULL;
- const char *asr_audiof = NULL;
- FILE *f_pcm = NULL;
- char *pcm_data = NULL;
- long pcm_count = 0;
- long pcm_size = 0;
- int last_audio = 0;
- int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
- int ep_status = MSP_EP_LOOKING_FOR_SPEECH;
- int rec_status = MSP_REC_STATUS_INCOMPLETE;
- int rss_status = MSP_REC_STATUS_INCOMPLETE;
- int errcode = -1;
- int aud_src = 1;
-
- //离线语法识别参数设置
- _snprintf(asr_params, MAX_PARAMS_LEN - 1,
- "engine_type = local, \
- asr_res_path = %s, sample_rate = %d, \
- grm_build_path = %s, local_grammar = %s, \
- result_type = xml, result_encoding = GB2312, ",
- ASR_RES_PATH,
- SAMPLE_RATE_16K,
- GRM_BUILD_PATH,
- udata->grammar_id
- );
-
- recognize_mic(asr_params);
-
- return 0;
- }
-
-
- int main(int argc, char* argv[]){
- const char *login_config = "appid = 5fa0d519"; //登录参数
- int ret = 0 ;
- ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
- if (MSP_SUCCESS != ret) {
- printf("登录失败:%d\n", ret);
- //goto exit;
- }
-
- memset(&g_asr_data, 0, sizeof(UserData));
- printf("构建离线识别语法网络...\n");
- ret = build_grammar(&g_asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
- if (MSP_SUCCESS != ret) {
- printf("构建语法调用失败!\n");
- }
-
- while (1 != g_asr_data.build_fini)
- _sleep(300);
- if (MSP_SUCCESS != g_asr_data.errcode)
- printf("离线识别语法网络构建完成,开始识别...\n");
-
- ret = run_asr(&g_asr_data);
- if (MSP_SUCCESS != ret) {
- printf("离线语法识别出错: %d \n", ret);
- }
- while(1){
- sr_start_listening(&g_asr);
- _sleep(1000);
- }
- return 1;
- }
-
- //初始化登录
- __declspec(dllexport) int loading_msp(){
- const char *login_config = "appid = 替换自己的appid"; //登录参数
- int ret = 0 ;
- ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
- if (MSP_SUCCESS != ret) {
- MSPLogout();
- return 0;
- }
- memset(&g_asr_data, 0, sizeof(UserData));
- return 1;
- }
-
- /* 构建命令词网络 */
- __declspec(dllexport) int build_net_msp(){
- int ret = 0 ;
- printf("构建离线识别语法网络...\n");
- ret = build_grammar(&g_asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
- if (MSP_SUCCESS != ret) {
- printf("构建语法调用失败!\n");
- return -1;
- }
- while (1 != g_asr_data.build_fini)
- _sleep(300);
- if (MSP_SUCCESS != g_asr_data.errcode)
- return 0;
- return 1;
- }
-
- /* 更新词典槽 */
- __declspec(dllexport) int update_lexicon_msp(){
- int ret = 0 ;
- printf("更新离线语法词典...\n");
- ret = update_lexicon(&g_asr_data); //当语法词典槽中的词条需要更新时,调用QISRUpdateLexicon接口完成更新
- if (MSP_SUCCESS != ret) {
- printf("更新词典调用失败!\n");
- return 0;
- }
- while (1 != g_asr_data.update_fini)
- _sleep(300);
- if (MSP_SUCCESS != g_asr_data.errcode)
- return 0;
- return 1;
- }
-
- /* 识别命令词 */
- __declspec(dllexport) int init_run_msp(){
- int ret = 0 ;
- ret = run_asr(&g_asr_data);
- if (MSP_SUCCESS != ret) {
- printf("离线语法识别出错: %d \n", ret);
- return 0;
- }
- return 1;
- }
-
- static wchar_t * char2wchar(char *str){
- int length = strlen(str)+1;
- wchar_t *t = (wchar_t*)malloc(sizeof(wchar_t)*length);
- memset(t,0,length*sizeof(wchar_t));
- MultiByteToWideChar(CP_ACP,0,str,strlen(str),t,length);
- return t;
- }
-
- /* 识别命令词 */
- __declspec(dllexport) wchar_t* start_listening(){
-
- wchar_t* re;
- if (sr_start_listening(&g_asr)!=NULL){
- // char * str1 = "小螺丝科技有限公司hahah1523";//g_result
- }
- re = char2wchar(bk_result);
- strcpy(bk_result,"0");
- return re;
- }
-
-
- /* 释放 */
- __declspec(dllexport) int free_msp(){
- MSPLogout();
- return 1;
- }
3.编译,重新生成DLL
4.提示 :打包dll后,确保在dll在unity里能够正常读取讯飞语音包引擎文件,这里添加了绝对路径 ,创建文件夹 c:\xunfei_speech 将SDK里的资源文件复制进去确保能够真确引用。
步骤八 Unity 调用 dll 实现业务交互调用
1.创建unity工程 speech_demo
2.Unity项目目录 Assets/Scenes 目录下创建目录 全路径为 Assets/Scenes/Plugins/Windows/X86_64(64位平台) Assets/Scenes/Plugins/Windows/X86(32位平台)创建Unity可引用的dll目录,将 asr_record_sample.dll、tts_offline_sample.dll,msc_x64.dll(两个引擎可共用一个),x86平台为msc_x.dll,复制到目录下。
3.创建脚本ToSpeech.cs,脚本代码如下:
- using System.Collections;
- using System.Collections.Generic;
- using UnityEngine;
-
- using System;
- using System.Text;
- using System.Linq;
- using System.IO;
- using System.Xml;
-
- using System.Runtime.InteropServices;
-
-
-
- public class ToSpeech : MonoBehaviour
- {
-
- //初始化TTS离线朗读语音引擎
- [DllImport("tts_offline_sample", EntryPoint = "loading_tts", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- extern static int loading_tts();
- //合成语音文件
- [DllImport("tts_offline_sample", EntryPoint = "to_speech", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- extern static int to_speech([MarshalAs(UnmanagedType.LPWStr)]string wav_path, [MarshalAs(UnmanagedType.LPWStr)]string tts_text);
- //释放TTS朗读语音引擎
- [DllImport("tts_offline_sample", EntryPoint = "free_tts", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- extern static int free_tts();
-
- //初始化命令词识别离线引擎
- [DllImport("asr_offline_record_sample", EntryPoint = "loading_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- public static extern int loading_msp();
- //初始化预热识别
- [DllImport("asr_offline_record_sample", EntryPoint = "init_run_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- static extern int init_run_msp();
- //构建识别网络
- [DllImport("asr_offline_record_sample", EntryPoint = "build_net_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- static extern int build_net_msp();
- //更新词典槽
- [DllImport("asr_offline_record_sample", EntryPoint = "update_lexicon_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- static extern int update_lexicon_msp();
- //开启监听——micphone
- [DllImport("asr_offline_record_sample", EntryPoint = "start_listening", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- static extern IntPtr start_listening();
- //释放命令词识别离线引擎
- [DllImport("asr_offline_record_sample", EntryPoint = "free_msp", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- static extern int free_msp();
-
- //test
- //[DllImport("asr_offline_record_sample", EntryPoint = "test", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
- //public static extern int test();
-
- public bool listening_off = false; //监听器循环开启关闭开关
- public int confidenct = 25; //置信度调节
- private int tts_off = 0;
- private float lastTime;
- private float curTime;
-
-
-
- void Start()
- {
- init_listing();
- }
-
- void Update()
- {
- update_listing();
- }
-
-
-
- //讲述人///
-
- public AudioSource aud;//需要节点关联
-
- IEnumerator LoadAudio(string recordPath)
- {
- WWW www = new WWW(recordPath);
- yield return www;
- var clipTemp = www.GetAudioClip();
- aud.clip = clipTemp;
- aud.Play();
- }
-
- private void speech_tts(string tts_text)
- {
-
- string wav_path = @"C:\\xunfei_speech\\" + tts_text + ".wav";
- if (File.Exists(wav_path))
- {
- StartCoroutine(LoadAudio(wav_path));
- return;
- }
- //如果文件已经生成就不用在合成了,直接读取播放
-
- tts_off = loading_tts();
- if (tts_off == 1)
- {
- print("初始化朗读成功:" + tts_off);
- int ret2 = to_speech(wav_path, tts_text);
- StartCoroutine(LoadAudio(wav_path));
- }
- else
- {
- print("初始化失败:" + tts_off);
- }
- free_tts();
- }
-
-
- /聆听人//
-
- //功能:初始化监听器
- void init_listing()
- {
- if (1 == loading_msp())
- {
- print("载入初始化成功");
- //实际应用中只执行一次///
- if (1 == build_net_msp())
- {
- print("下载构建网络命令词成功");
- if (1 == update_lexicon_msp())
- {
- print("更新词典槽成功");
- }
- }
-
- if (1 == init_run_msp())
- {
- print("预热命令词成功");
- listening_off = true;
- }
- }
- lastTime = Time.time;
- }
-
- //功能:循环发送mic监听器开启,可以重复发送,sdk 做了同步处理
- //返回:xml格式数据
- string update_listing()
- {
- string restr = "0";
- curTime = Time.time;
- if (curTime - lastTime >= 1)
- {
- if (listening_off)
- {
- IntPtr ip = start_listening();
- restr = Marshal.PtrToStringUni(ip);
- print("打开麦克风监听中..." + restr);
- get_result_confidenct(restr);//解析xml
- }
- // print(curTime - lastTime);
- lastTime = curTime;
- }
- return restr;
- }
-
- //功能:释放监听器
- void OnDestroy(){
- free_msp();
- print("退出了");
- }
-
- //功能:语音识别结果返回xml解析
- //传入:xml字符串;
- //返回:置信度结果,是一个0~1的浮点类型数字;
- double get_result_confidenct(string strxml)
- {
- // string strxml = @"<?xml version='1.0' encoding='gb2312' standalone='yes' ?><nlp> <version>1.1</version> <rawtext>您好</rawtext> <confidence>27</confidence> <engine>local</engine> <result> <focus>您好</focus> <confidence>18</confidence> <object> <您好 id='65535'>您好</您好> </object> </result></nlp>";
- if (Equals(strxml,"0")) { return 0; } //输入不是xml过滤掉 直接 返回结果;
-
- XmlDocument xml = new XmlDocument();
- XmlReaderSettings set = new XmlReaderSettings();
- set.IgnoreComments = true;
- xml.LoadXml(strxml);
-
- int s_confidenct = 0;
- string s_rawtext = "";
- int d_confidenct = 0;
- string d_focus = "";
- double res = 0;
-
- XmlNodeList sour_xmlNodeList = xml.SelectSingleNode("nlp").ChildNodes;
- foreach (XmlElement node in sour_xmlNodeList)
- {
- if (node.Name == "confidence")
- {
- s_confidenct = int.Parse(node.InnerText.Trim());
- print(node.Name + ":" + s_confidenct);
- }
- if (node.Name == "rawtext")
- {
- s_rawtext = node.InnerText.Trim();
- print(node.Name + ":" + s_rawtext);
- }
- }
-
- XmlNodeList back_dst_xmlNodeList = xml.SelectSingleNode("nlp").SelectSingleNode("result").ChildNodes;
- foreach (XmlElement node in back_dst_xmlNodeList)
- {
- if (node.Name == "confidence")
- {
- d_confidenct = int.Parse(node.InnerText.Trim());
- print(node.Name + ":" + d_confidenct);
- }
- if (node.Name == "focus")
- {
- d_focus = node.InnerText.Trim();
- print(node.Name + ":" + d_focus);
- }
- }
-
- if (s_confidenct > confidenct)//识别置信度调节
- {
- send_speech_result(d_focus, s_confidenct);
- }
-
- return res; //返回匹配相似度;
- }
-
- //功能:向父脚本子脚本广播识别结果
- //传入:1.结果文本|2.置信度|3.计算的匹配率 0~1 之间的浮点数
- void send_speech_result(string focus, int percent)
- {
-
- listening_off = false;
- free_msp();//临时退出登录
- if (Equals(focus, "小微") || Equals(focus, "小微小微"))
- {
- speech_tts("在的");
- init_listing();
- return;
- }
- speech_tts(focus);
- init_listing();
-
- //以下是将语音识别结果广播给其他节点
- object[] message = new object[2];
- message[0] = focus;
- message[1] = percent;
- this.gameObject.SendMessage("speech_result", message, SendMessageOptions.DontRequireReceiver);
- this.gameObject.BroadcastMessage("speech_result", message, SendMessageOptions.DontRequireReceiver);
- }
-
- /
-
-
- }
4.Unity创建 节点1 2D Sprinte 节点2 Audio Audio source 将节点2拖拽到节点1 Inspector面板 Aud节点实现引用关联。
5.该应用使用命令词实现了类似本地唤醒功能,软件运行后一直update 麦克风,监听到命令响应在的 或做其他命令处理。
6.语音合成WAV文件,做了本地保存,反复调用直接读取本地文件,不用反复使用TTS作文字到语音的转换,以节省资源。
步骤九
巴科斯范式.bnf文件可以关注这里巴科斯范式详解,这里简化了命令词定义用于测试。
- #BNF+IAT 1.0;
- !grammar call;
-
- !start <callstart>;
- <callstart>:syj|小微小微|小微|向前走|下一步|hello|你好|您
- 好|小螺丝|星际迷航|大陆|今天太冷了;
步骤九
问题总结
1,两个离线引擎不能同时运行,所以上面实现的是交替运行模式 就是识别模块运行,朗读模块初始化会失败,反之亦然,这样导致灵活性不强,不确定问题在哪里,也可能是引用了一个msc_x64.dll文件,这里不太确定,unity目录引用原因未能做测试,期待解决方案。
2.离线命令词识别返回结果会有1-3秒的识别反应时间,才能返回结果 ,这个时间可以做一个语音回馈,但是没有返回结果就不能确定是否应该触发语音回馈,期待解决方案。
3.语音合成朗读速度很快,可以放心使用。
4.以上测试文件打包,有需要可以自行下载。
https://download.csdn.net/download/lijiefu123456/13098984
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。