当前位置:   article > 正文

Unity接入百度语音识别SDK windows平台_unity 百度语音唤醒

unity 百度语音唤醒

1.先注册百度开放平台的账号,然后按文档申请试用资格及创建应用:https://ai.baidu.com/ai-doc/SPEECH/qknh9i8ed

 Windows平台选择“不需要”就可以了。

2.下载C# SDK包

解压以后里面包含这两个文件夹,我工程里面用的是net45,将文件夹放入unity Assets/Plugins文件夹下:

对应unity的Api Compatibility Level属性设置要改为.NET 4.X,设置路径为PlayerSetting-OtherSetting-Configuration-Api Compatibility Level

至此百度语音SDK导入完毕,目前我自己总结了两种方式去实现语音识别的方式:

第一种是用unity的UnityWebRequest去实现,是看的一位博主的教程,链接地址是:Unity百度语音识别-CSDN博客

第二种是看官方文档以后获取SDK的接口来实现的,具体实现方式如下:

在Git上下载了SDK源码进行学习,代码里面有具体功能实现的对应接口,源码地址:GitHub - Baidu-AIP/dotnet-sdk: 百度AI开放平台 .Net SDK

下载完毕解压后如下图,从Git的说明文档上可知speech文件夹中即是语音识别的代码:

在Asr类中,找到“识别语音数据”的方法接口JObject Recognize

byte[] data:音频数据;

string format:音频格式;

int rate:采样频率;

options:语言类型,默认为1537普通话,还支持粤语、四川话、英语等,具体看官方文档介绍。

 

1.进行录音采集保存

  1. void StartRecord()
  2. {
  3. Debug.LogError("开始");
  4. saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
  5. }

2.对录音进行转换,将音频转换为Byte文件

 

  1. public byte[] ConvertClipToBytes(AudioClip audioClip)
  2. {
  3. float[] samples = new float[audioClip.samples];
  4. audioClip.GetData(samples, 0);
  5. short[] intData = new short[samples.Length];
  6. byte[] bytesData = new byte[samples.Length * 2];
  7. int rescaleFactor = 32767;
  8. for (int i = 0; i < samples.Length; i++)
  9. {
  10. intData[i] = (short)(samples[i] * rescaleFactor);
  11. byte[] byteArr = new byte[2];
  12. byteArr = BitConverter.GetBytes(intData[i]);
  13. byteArr.CopyTo(bytesData, i * 2);
  14. }
  15. return bytesData;
  16. }

 3.转换完毕通过SDK接口发送

var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);

4.将返回的数据转换为字符串,并用正则表达式提取自己想要的信息

  1. string str = JsonConvert.SerializeObject(result, Formatting.None);
  2. if (!string.IsNullOrEmpty(str))
  3. {
  4. if (Regex.IsMatch(str, @"err_msg.:.success"))
  5. {
  6. Match match = Regex.Match(str, "result.:..(.*?)..]");
  7. if (match.Success)
  8. {
  9. str = match.Groups[1].ToString();
  10. }
  11. }
  12. else
  13. {
  14. str = "识别结果为空";
  15. }
  16. tex.text = str;
  17. }

完整代码如下: 

 偷懒写的代码不牢固,result在获取的时候有可能会出现NULL值,所以你们可以用Try Catch做一下容错

  1. using System.Collections;
  2. using System.Collections.Generic;
  3. using System.Text.RegularExpressions;
  4. using UnityEngine;
  5. using UnityEngine.UI;
  6. using UnityEngine.Networking;
  7. using System;
  8. using Baidu.Aip.Speech;
  9. using Newtonsoft.Json;
  10. public class Test : MonoBehaviour
  11. {
  12. public string app_id;
  13. public string api_key;
  14. public string secret_Key;
  15. public Asr asr;
  16. string accessToken = string.Empty;
  17. int recordFrequency = 8000; //录音频率
  18. int recordMaxTime = 20;//最大录音时长
  19. AudioClip saveAudioClip;//存储当前录音的片段
  20. AudioSource source;
  21. string currentDeviceName = string.Empty;
  22. Text tex;
  23. Dictionary<string, object> languageType = new Dictionary<string, object>();
  24. // Start is called before the first frame update
  25. void Start()
  26. {
  27. saveAudioClip = this.transform.GetComponent<AudioClip>();
  28. source = this.transform.GetComponent<AudioSource>();
  29. tex = GameObject.Find("Canvas/ResultTex").GetComponent<Text>();
  30. asr = new Asr(app_id, api_key, secret_Key);
  31. languageType.Add("dev_pid", 1537);
  32. }
  33. // Update is called once per frame
  34. void Update()
  35. {
  36. if (Input.GetKeyDown(KeyCode.Space))
  37. {
  38. StartRecord();
  39. } else if (Input.GetKeyUp(KeyCode.Space))
  40. {
  41. EndRecord();
  42. }
  43. }
  44. public byte[] ConvertClipToBytes(AudioClip audioClip)
  45. {
  46. float[] samples = new float[audioClip.samples];
  47. audioClip.GetData(samples, 0);
  48. short[] intData = new short[samples.Length];
  49. byte[] bytesData = new byte[samples.Length * 2];
  50. int rescaleFactor = 32767;
  51. for (int i = 0; i < samples.Length; i++)
  52. {
  53. intData[i] = (short)(samples[i] * rescaleFactor);
  54. byte[] byteArr = new byte[2];
  55. byteArr = BitConverter.GetBytes(intData[i]);
  56. byteArr.CopyTo(bytesData, i * 2);
  57. }
  58. return bytesData;
  59. }
  60. /// <summary>
  61. /// 开始录音
  62. /// </summary>
  63. void StartRecord()
  64. {
  65. Debug.LogError("开始");
  66. saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
  67. }
  68. /// <summary>
  69. /// 结束录音
  70. /// </summary>
  71. void EndRecord()
  72. {
  73. Debug.LogError("结束");
  74. Microphone.End(currentDeviceName);
  75. source.PlayOneShot(saveAudioClip);
  76. var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
  77. string str = JsonConvert.SerializeObject(result, Formatting.None);
  78. if (!string.IsNullOrEmpty(str))
  79. {
  80. if (Regex.IsMatch(str, @"err_msg.:.success"))
  81. {
  82. Match match = Regex.Match(str, "result.:..(.*?)..]");
  83. if (match.Success)
  84. {
  85. str = match.Groups[1].ToString();
  86. }
  87. }
  88. else
  89. {
  90. str = "识别结果为空";
  91. }
  92. tex.text = str;
  93. }
  94. // StartCoroutine(RequestASR());//请求语音识别
  95. }
  96. }

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小丑西瓜9/article/detail/483916
推荐阅读
相关标签
  

闽ICP备14008679号