当前位置:   article > 正文

JAVA实现微软语音实时翻译

java实现微软语音实时翻译

        由于公司最初项目立项时需要使用到微软的实时语音识别,所以研究了下微软的官方sdk和api,前端和java的交互相对简单,前端页面通过HZRcorder采集实时音频流,处理成二进制,后端netty+websocket接收消息,难点是微软的翻译,微软官方给了几种翻译类型,一种是一段音频片段,一种是硬件设备直接获取流做翻译,我们的服务需要部署服务器所以没有办法用第二种,第一种情况是微软的SpeechRecognizer对象可以接收一个特殊的流对象PullAudioStreamCallback作为数据源,如果传入了这个对象,SpeechRecognizer会主动从该流对象里读取数据。但是SpeechRecognizer会在流中读取到0个字节后停止识别,在我们的场景中默认的流类型无法满足需求,当没有数据读取到时它们无法block住,PullAudioStreamCallback期望的效果是只有当明确流结束时读取流的Read()方法才返回0。因此需要定义我们自己的音频流对象;

  1. package ********;
  2. import com.microsoft.cognitiveservices.speech.audio.PullAudioInputStreamCallback;
  3. import lombok.extern.slf4j.Slf4j;
  4. import java.io.IOException;
  5. import java.io.InputStream;
  6. @Slf4j
  7. public class VoiceAudioStream extends PullAudioInputStreamCallback {
  8. private EchoStream _dataStream = new EchoStream();
  9. private ManualResetEvent _waitForEmptyDataStream = null;
  10. private InputStream stream;
  11. @Override
  12. public int read(byte[] dataBuffer) //S2T服务从PullAudioInputStream中读取数据, 读到0个字节并不会关闭流
  13. {
  14. long ret = 0;
  15. if (_waitForEmptyDataStream != null && !_dataStream.DataAvailable())
  16. {//用户主动close时可以关闭流
  17. _waitForEmptyDataStream.Set();
  18. return 0;
  19. }
  20. try {
  21. if(this.stream != null){
  22. //log.info("1前:{}",dataBuffer);
  23. ret = this.stream.read(dataBuffer,0, dataBuffer.length);
  24. //log.info("1后:{}",dataBuffer);
  25. if((int)ret < 1){
  26. // log.info("2");
  27. this.stream = _dataStream.Read(dataBuffer, 0, dataBuffer.length);
  28. ret = this.stream.read(dataBuffer,0, dataBuffer.length);
  29. }
  30. }else{
  31. //log.info("3");
  32. this.stream = _dataStream.Read(dataBuffer, 0, dataBuffer.length);
  33. ret = this.stream.read(dataBuffer,0, dataBuffer.length);
  34. }
  35. } catch (IOException e) {
  36. e.printStackTrace();
  37. }
  38. return (int)Math.max(0, ret);
  39. }
  40. public void write(byte[] buffer, int offset, int count) //Client向PullAudioInputStream写入数据
  41. {
  42. _dataStream.write(buffer, offset, count);
  43. }
  44. @Override
  45. public void close(){
  46. if (_dataStream.DataAvailable())
  47. {
  48. log.info("进到close里面了");
  49. _waitForEmptyDataStream = new ManualResetEvent(false); //通过ManualResetEvent强制流的使用者必须调用close来手动关闭流
  50. try {
  51. _waitForEmptyDataStream.WaitOne();
  52. } catch (InterruptedException e) {
  53. e.printStackTrace();
  54. }
  55. }
  56. log.info("等待了吗?");
  57. _waitForEmptyDataStream.close();
  58. try {
  59. _dataStream.close();
  60. } catch (IOException e) {
  61. e.printStackTrace();
  62. }
  63. //_dataStream.close();
  64. try {
  65. this.stream.close();
  66. } catch (IOException ex) {
  67. // ignored
  68. }
  69. }
  70. }
  1. package *****;
  2. import lombok.extern.slf4j.Slf4j;
  3. import java.io.ByteArrayInputStream;
  4. import java.io.IOException;
  5. import java.io.InputStream;
  6. import java.util.concurrent.ConcurrentLinkedDeque;
  7. @Slf4j
  8. public class EchoStream extends InputStream {
  9. private ManualResetEvent _DataReady = new ManualResetEvent(false);
  10. private ConcurrentLinkedDeque<byte[]> _Buffers = new ConcurrentLinkedDeque<>();
  11. public Boolean DataAvailable(){
  12. return !_Buffers.isEmpty();
  13. }
  14. public void write(byte[] buffer, int offset, int count)
  15. {
  16. // log.info("开始write,EchoStream");
  17. _Buffers.addLast(buffer);
  18. if(_Buffers.size()>1){
  19. _DataReady.Set();
  20. }
  21. }
  22. @Override
  23. public int read() throws IOException {
  24. return 0;
  25. }
  26. public byte[] getLBuffer(){
  27. if(_Buffers.size() != 0){
  28. return _Buffers.pollFirst();
  29. }
  30. return new byte[0];
  31. }
  32. public InputStream Read(byte[] buffer, int offset, int count)
  33. {
  34. //log.info("开始read,EchoStream");
  35. try {
  36. if(_Buffers.size() == 0){
  37. _DataReady.WaitOne();
  38. }
  39. } catch (InterruptedException e) {
  40. e.printStackTrace();
  41. }
  42. //log.info("开始read,lBuffer前:{}",_Buffers.size());
  43. byte[] lBuffer = _Buffers.pollFirst();
  44. //log.info("开始read,lBuffer后:{}",lBuffer);
  45. if (lBuffer == null || lBuffer.length == 0)
  46. {
  47. // log.info("读取到的lBuffer为空,进入等待");
  48. _DataReady.Reset();
  49. //return -1;
  50. }
  51. if (!DataAvailable()) {
  52. // log.info("此时dataready为空");
  53. _DataReady.Reset();
  54. }
  55. //buffer = Arrays.copyOf(lBuffer, lBuffer.length);
  56. buffer = lBuffer.clone();
  57. //log.info("buffer:{},lBuffer.length:{}",buffer,lBuffer.length);
  58. //buffer = lBuffer;
  59. //return buffer.length;
  60. return new ByteArrayInputStream(buffer);
  61. }
  62. @Override
  63. public void close() throws IOException {
  64. super.close();
  65. }
  66. }

前端建立连接的时候创建微软翻译的连接;

  1. public static Boolean buildConnect(SessionEntity sessionEntity)
  2. {
  3. try{
  4. stopTranslationWithAudioStreamSemaphore = new Semaphore(0);
  5. //初始化语音翻译配置实例
  6. //SpeechTranslationConfig speechTranslationConfig = getInstance(speechKey ,speechRegion);
  7. //初始化语言
  8. speechTranslationConfig.setSpeechRecognitionLanguage(sessionEntity.getFromLanguage());
  9. speechTranslationConfig.addTargetLanguage(sessionEntity.getToLanguage());
  10. //初始化语音流
  11. VoiceAudioStream audioStream = null;
  12. if(sessionEntity.getAudioStream() != null && !sessionEntity.getAudioStream().equals("")){
  13. log.info("进来了?");
  14. audioStream = sessionEntity.getAudioStream();
  15. }else{
  16. audioStream = new VoiceAudioStream();
  17. sessionEntity.setAudioStream(audioStream);
  18. sessionEntity.setVoiceData(new ArrayList<>());
  19. }
  20. AudioStreamFormat audioFormat = AudioStreamFormat.getWaveFormatPCM(Settings.getEmbeddedSpeechSamplesPerSecond(), Settings.getEmbeddedSpeechBitsPerSample(), Settings.getEmbeddedSpeechChannels());
  21. PullAudioInputStream pullStream = PullAudioInputStream.createPullStream(audioStream, audioFormat);
  22. AudioConfig audioConfig = AudioConfig.fromStreamInput(pullStream);
  23. TranslationRecognizer translationRecognizer = new TranslationRecognizer(speechTranslationConfig, audioConfig);
  24. sessionEntity.setTranslationRecognizer(translationRecognizer);
  25. //绑定监听方法
  26. translationRecognizer.recognizing.addEventListener((s, e) -> {
  27. log.info("tempResult:{}", e.getResult());
  28. //这里是中间结果,每出现一次就推送一次
  29. AsrResult asrResult = Constant.asrResultMap.get(sessionEntity.getSessionID());
  30. Integer asrIndex = 0;
  31. if(asrResult == null){ //第一次进入
  32. //log.info("第一次进入");
  33. //获取本房间最新的文本下标
  34. asrIndex = Constant.roomIndex.get(sessionEntity.getRoomId());
  35. if(asrIndex == null){
  36. asrIndex = 0;
  37. }
  38. Constant.roomIndex.put(sessionEntity.getRoomId(),asrIndex+1);
  39. asrResult = new AsrResult();
  40. asrResult.setStartTime(DateUtils.now());
  41. asrResult.setEndTime(DateUtils.now());
  42. asrResult.setAsrIndex(asrIndex);
  43. }else{
  44. //log.info("第二次进入");
  45. //判断本次结果与上次结果之间时间差是否超过一定值
  46. Long thisTime = System.currentTimeMillis();
  47. Long lastTime = DateUtils.gettimeStemp(asrResult.getEndTime(),"yyyy-MM-dd HH:mm:ss");
  48. if((thisTime - lastTime) > 2000L) {
  49. //log.info("两次时间过长");
  50. //index新增
  51. asrIndex = Constant.roomIndex.get(sessionEntity.getRoomId());
  52. asrResult.setAsrIndex(asrIndex);
  53. Constant.roomIndex.put(sessionEntity.getRoomId(),asrIndex+1);
  54. asrResult.setStartTime(DateUtils.now());
  55. }
  56. asrResult.setEndTime(DateUtils.now());
  57. }
  58. asrResult.setAsrText(e.getResult().getText());
  59. asrResult.setTempText(e.getResult().getText());
  60. asrResult.setUserId(sessionEntity.getExtNum());
  61. asrResult.setTranslation(e.getResult().getTranslations());
  62. asrResult.setTempType(1);
  63. Constant.asrResultMap.put(sessionEntity.getSessionID(),asrResult);
  64. WebSocketServerHandler.sendReplyToRoom(sessionEntity.getRoomId(), sessionEntity.getSessionID(), asrResult);
  65. });
  66. translationRecognizer.recognized.addEventListener((s, e) -> {
  67. log.info("RECOGNIZEDResult:{}", e.getResult());
  68. if (e.getResult().getReason() == ResultReason.RecognizedSpeech) {
  69. //这里是中间结果,每出现一次就推送一次
  70. if(!e.getResult().getText().equals("")){
  71. AsrResult asrResult = new AsrResult();
  72. Integer asrIndex = Constant.asrIndexMap.get(sessionEntity.getSessionID());
  73. asrResult.setAsrIndex(asrIndex);
  74. asrResult.setAsrText(e.getResult().getText());
  75. asrResult.setEndTime(DateUtils.now());
  76. asrResult.setTempText(e.getResult().getText());
  77. asrResult.setUserId(sessionEntity.getExtNum());
  78. asrResult.setTranslation(e.getResult().getTranslations());
  79. asrResult.setTempType(0);
  80. Constant.asrIndexMap.remove(sessionEntity.getSessionID());
  81. WebSocketServerHandler.sendReplyToRoom(sessionEntity.getRoomId(), sessionEntity.getSessionID(), asrResult);
  82. }
  83. }
  84. else if (e.getResult().getReason() == ResultReason.NoMatch) {
  85. log.info("NOMATCH: Speech could not be recognized.");
  86. }
  87. });
  88. translationRecognizer.canceled.addEventListener((s, e) -> {
  89. if (e.getReason() == CancellationReason.Error) {
  90. log.info("CANCELED: ErrorCode=" + e.getErrorCode());
  91. log.info("CANCELED: ErrorDetails=" + e.getErrorDetails());
  92. }
  93. Constant.asrResultMap.remove(sessionEntity.getSessionID());
  94. stopTranslationWithAudioStreamSemaphore.release();
  95. });
  96. translationRecognizer.sessionStopped.addEventListener((s, e) -> {
  97. log.info("\n Session stopped event.");
  98. Constant.asrResultMap.remove(sessionEntity.getSessionID());
  99. stopTranslationWithAudioStreamSemaphore.release();
  100. });
  101. try {
  102. log.info("开始异步识别");
  103. translationRecognizer.startContinuousRecognitionAsync().get();
  104. return true;
  105. } catch (InterruptedException e) {
  106. e.printStackTrace();
  107. } catch (ExecutionException e) {
  108. e.printStackTrace();
  109. }
  110. }catch (Exception e){
  111. log.info("微软buildConnect异常:{}",e.getStackTrace());
  112. }
  113. return false;
  114. }

接收语音流直接传入

  1. /**
  2. * 接收数据,开始识别
  3. * */
  4. public static void ReceiveAudio(SessionEntity sessionEntity,byte[] audioChunk)
  5. {
  6. if(audioChunk.length>0){
  7. sessionEntity.getAudioStream().write(audioChunk,0,audioChunk.length);
  8. }
  9. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/菜鸟追梦旅行/article/detail/377289
推荐阅读
相关标签
  

闽ICP备14008679号