赞
踩
1.先注册百度开放平台的账号,然后按文档申请试用资格及创建应用:https://ai.baidu.com/ai-doc/SPEECH/qknh9i8ed
Windows平台选择“不需要”就可以了。
2.下载C# SDK包
解压以后里面包含这两个文件夹,我工程里面用的是net45,将文件夹放入unity Assets/Plugins文件夹下:
对应unity的Api Compatibility Level属性设置要改为.NET 4.X,设置路径为PlayerSetting-OtherSetting-Configuration-Api Compatibility Level
至此百度语音SDK导入完毕,目前我自己总结了两种方式去实现语音识别的方式:
第一种是用unity的UnityWebRequest去实现,是看的一位博主的教程,链接地址是:Unity百度语音识别-CSDN博客
第二种是看官方文档以后获取SDK的接口来实现的,具体实现方式如下:
在Git上下载了SDK源码进行学习,代码里面有具体功能实现的对应接口,源码地址:GitHub - Baidu-AIP/dotnet-sdk: 百度AI开放平台 .Net SDK
下载完毕解压后如下图,从Git的说明文档上可知speech文件夹中即是语音识别的代码:
在Asr类中,找到“识别语音数据”的方法接口JObject Recognize
byte[] data:音频数据;
string format:音频格式;
int rate:采样频率;
options:语言类型,默认为1537普通话,还支持粤语、四川话、英语等,具体看官方文档介绍。
1.进行录音采集保存
- void StartRecord()
- {
- Debug.LogError("开始");
- saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
- }
2.对录音进行转换,将音频转换为Byte文件
- public byte[] ConvertClipToBytes(AudioClip audioClip)
- {
- float[] samples = new float[audioClip.samples];
- audioClip.GetData(samples, 0);
- short[] intData = new short[samples.Length];
- byte[] bytesData = new byte[samples.Length * 2];
- int rescaleFactor = 32767;
-
- for (int i = 0; i < samples.Length; i++)
- {
- intData[i] = (short)(samples[i] * rescaleFactor);
- byte[] byteArr = new byte[2];
- byteArr = BitConverter.GetBytes(intData[i]);
- byteArr.CopyTo(bytesData, i * 2);
- }
- return bytesData;
- }
3.转换完毕通过SDK接口发送
var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
4.将返回的数据转换为字符串,并用正则表达式提取自己想要的信息
- string str = JsonConvert.SerializeObject(result, Formatting.None);
-
- if (!string.IsNullOrEmpty(str))
- {
- if (Regex.IsMatch(str, @"err_msg.:.success"))
- {
- Match match = Regex.Match(str, "result.:..(.*?)..]");
- if (match.Success)
- {
- str = match.Groups[1].ToString();
- }
- }
- else
- {
- str = "识别结果为空";
- }
- tex.text = str;
- }
完整代码如下:
偷懒写的代码不牢固,result在获取的时候有可能会出现NULL值,所以你们可以用Try Catch做一下容错
- using System.Collections;
- using System.Collections.Generic;
- using System.Text.RegularExpressions;
- using UnityEngine;
- using UnityEngine.UI;
- using UnityEngine.Networking;
- using System;
- using Baidu.Aip.Speech;
- using Newtonsoft.Json;
-
- public class Test : MonoBehaviour
- {
- public string app_id;
- public string api_key;
- public string secret_Key;
- public Asr asr;
- string accessToken = string.Empty;
- int recordFrequency = 8000; //录音频率
- int recordMaxTime = 20;//最大录音时长
- AudioClip saveAudioClip;//存储当前录音的片段
- AudioSource source;
- string currentDeviceName = string.Empty;
- Text tex;
- Dictionary<string, object> languageType = new Dictionary<string, object>();
-
- // Start is called before the first frame update
- void Start()
- {
- saveAudioClip = this.transform.GetComponent<AudioClip>();
- source = this.transform.GetComponent<AudioSource>();
- tex = GameObject.Find("Canvas/ResultTex").GetComponent<Text>();
- asr = new Asr(app_id, api_key, secret_Key);
- languageType.Add("dev_pid", 1537);
- }
-
- // Update is called once per frame
- void Update()
- {
- if (Input.GetKeyDown(KeyCode.Space))
- {
- StartRecord();
- } else if (Input.GetKeyUp(KeyCode.Space))
- {
- EndRecord();
- }
- }
-
- public byte[] ConvertClipToBytes(AudioClip audioClip)
- {
- float[] samples = new float[audioClip.samples];
- audioClip.GetData(samples, 0);
- short[] intData = new short[samples.Length];
- byte[] bytesData = new byte[samples.Length * 2];
- int rescaleFactor = 32767;
-
- for (int i = 0; i < samples.Length; i++)
- {
- intData[i] = (short)(samples[i] * rescaleFactor);
- byte[] byteArr = new byte[2];
- byteArr = BitConverter.GetBytes(intData[i]);
- byteArr.CopyTo(bytesData, i * 2);
- }
- return bytesData;
- }
-
- /// <summary>
- /// 开始录音
- /// </summary>
- void StartRecord()
- {
- Debug.LogError("开始");
- saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
- }
-
- /// <summary>
- /// 结束录音
- /// </summary>
- void EndRecord()
- {
- Debug.LogError("结束");
- Microphone.End(currentDeviceName);
- source.PlayOneShot(saveAudioClip);
- var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
- string str = JsonConvert.SerializeObject(result, Formatting.None);
-
- if (!string.IsNullOrEmpty(str))
- {
- if (Regex.IsMatch(str, @"err_msg.:.success"))
- {
- Match match = Regex.Match(str, "result.:..(.*?)..]");
- if (match.Success)
- {
- str = match.Groups[1].ToString();
- }
- }
- else
- {
- str = "识别结果为空";
- }
- tex.text = str;
- }
- // StartCoroutine(RequestASR());//请求语音识别
- }
-
-
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。