当前位置:   article > 正文

Java离线视频提取音频+音频提取文案_java解析音频中的文字

java解析音频中的文字

需引入依赖javacv、vosk相关依赖,

至于javacv依赖,网上有很多缩减方案,注释部分是可行的缩减方案,至于视频提取视频这里无需安装ffmpeg,只需引入依赖。而vosk需要下载模型方可使用,并且下载比较慢,可先用小模型跑通。

  1. <properties>
  2. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  3. <javacv.version>1.5.6</javacv.version>
  4. <system.windowsx64>windows-x86_64</system.windowsx64>
  5. </properties>
  6. <!-- javacv+javacpp -->
  7. <!-- <dependency>-->
  8. <!-- <groupId>org.bytedeco</groupId>-->
  9. <!-- <artifactId>javacv</artifactId>-->
  10. <!-- <version>${javacv.version}</version>-->
  11. <!-- </dependency>-->
  12. <!-- <dependency>-->
  13. <!-- <groupId>org.bytedeco</groupId>-->
  14. <!-- <artifactId>javacpp-platform</artifactId>-->
  15. <!-- <version>${javacv.version}</version>-->
  16. <!-- </dependency>-->
  17. <!-- &lt;!&ndash; ffmpeg最小依赖包,必须包含上面的javacv+javacpp核心库 &ndash;&gt;-->
  18. <!-- <dependency>-->
  19. <!-- <groupId>org.bytedeco</groupId>-->
  20. <!-- <artifactId>ffmpeg</artifactId>-->
  21. <!-- <version>4.4-${javacv.version}</version>-->
  22. <!-- <classifier>${system.windowsx64}</classifier>-->
  23. <!-- </dependency>-->
  24. <!--&lt;!&ndash; 最小opencv依赖包 ,必须包含上面的javacv+javacpp&ndash;&gt;-->
  25. <!-- <dependency>-->
  26. <!-- <groupId>org.bytedeco</groupId>-->
  27. <!-- <artifactId>opencv</artifactId>-->
  28. <!-- <version>4.5.1-${javacv.version}</version>-->
  29. <!-- <classifier>${system.windowsx64}</classifier>-->
  30. <!-- </dependency>-->
  31. <!-- <dependency>-->
  32. <!-- <groupId>org.bytedeco</groupId>-->
  33. <!-- <artifactId>openblas</artifactId>-->
  34. <!-- <version>0.3.13-${javacv.version}</version>-->
  35. <!-- <classifier>${system.windowsx64}</classifier>-->
  36. <!-- </dependency>-->
  37. <!-- <dependency>-->
  38. <!-- <groupId>org.bytedeco</groupId>-->
  39. <!-- <artifactId>flycapture</artifactId>-->
  40. <!-- <version>2.13.3.31-${javacv.version}</version>-->
  41. <!-- <classifier>${system.windowsx64}</classifier>-->
  42. <!-- </dependency>-->
  43. <dependencies>
  44. <!-- 视频提取音频信息 -->
  45. <dependency>
  46. <groupId>org.bytedeco</groupId>
  47. <artifactId>javacv-platform</artifactId>
  48. <version>1.5.10</version>
  49. </dependency>
  50. <!-- 获取音频信息 -->
  51. <dependency>
  52. <groupId>org</groupId>
  53. <artifactId>jaudiotagger</artifactId>
  54. <version>2.0.3</version>
  55. </dependency>
  56. <dependency>
  57. <groupId>net.java.dev.jna</groupId>
  58. <artifactId>jna</artifactId>
  59. <version>5.13.0</version>
  60. </dependency>
  61. <dependency>
  62. <groupId>com.alphacephei</groupId>
  63. <artifactId>vosk</artifactId>
  64. <version>0.3.45</version>
  65. </dependency>
  66. <!-- JAVE2(Java音频视频编码器)库是ffmpeg项目上的Java包装器。 -->
  67. <dependency>
  68. <groupId>ws.schild</groupId>
  69. <artifactId>jave-core</artifactId>
  70. <version>3.1.1</version>
  71. </dependency>
  72. <dependency>
  73. <groupId>com.alibaba</groupId>
  74. <artifactId>fastjson</artifactId>
  75. <version>1.2.83</version>
  76. </dependency>
  77. </dependencies>

视频提取音频

  1. package org.example;
  2. import org.bytedeco.ffmpeg.global.avcodec;
  3. import org.bytedeco.javacv.FFmpegFrameGrabber;
  4. import org.bytedeco.javacv.FFmpegFrameRecorder;
  5. import org.bytedeco.javacv.Frame;
  6. public class Test {
  7. public static void extractVoice(String sourceFileName, String audioUrl) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
  8. //抓取资源
  9. FFmpegFrameGrabber frameGrabber = new FFmpegFrameGrabber(sourceFileName);
  10. Frame frame = null;
  11. FFmpegFrameRecorder recorder = null;
  12. frameGrabber.start();
  13. //转录为单轨, 16K采样率, wav格式
  14. recorder = new FFmpegFrameRecorder(audioUrl, frameGrabber.getAudioChannels());
  15. recorder.setFormat(frameGrabber.getFormat());
  16. recorder.setSampleRate(frameGrabber.getSampleRate());//frameGrabber.getSampleRate()
  17. //recorder.setAudioBitrate(128000);// 音频比特率
  18. recorder.setTimestamp(frameGrabber.getTimestamp());
  19. recorder.setVideoCodec(avcodec.AV_CODEC_ID_NONE); // 不录制视频
  20. recorder.start();
  21. int index = 0;
  22. while (true) {
  23. frame = frameGrabber.grabSamples();
  24. if (frame == null) break;
  25. if (frame.samples != null) {
  26. recorder.recordSamples(frame.sampleRate, frame.audioChannels, frame.samples);
  27. recorder.setTimestamp(frameGrabber.getTimestamp());
  28. }
  29. index++;
  30. }
  31. recorder.stop();
  32. recorder.release();
  33. frameGrabber.stop();
  34. frameGrabber.release();
  35. }
  36. public static void main(String[] args) throws FFmpegFrameGrabber.Exception, FFmpegFrameRecorder.Exception {
  37. String videoFilePath = "I:\\workspace\\test.mp4"; // 视频文件路径
  38. String audioOutputPath = "I:\\workspace\\test_audio.wav"; // 输出的音频文件路径
  39. long s = System.currentTimeMillis();
  40. extractVoice(videoFilePath, audioOutputPath);
  41. System.out.println(System.currentTimeMillis() - s);
  42. }
  43. }

音频提取文字

至于model可去此网站下载,解压使用。大模型下载较慢

VOSK Models

  1. package org.example;
  2. import com.alibaba.fastjson.JSON;
  3. import org.vosk.LibVosk;
  4. import org.vosk.LogLevel;
  5. import org.vosk.Model;
  6. import org.vosk.Recognizer;
  7. import javax.sound.sampled.*;
  8. import java.io.*;
  9. import java.util.Optional;
  10. public class Test3 {
  11. public static void main(String[] args) {
  12. StringBuilder result = new StringBuilder();
  13. LibVosk.setLogLevel(LogLevel.DEBUG);
  14. AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 44100, 16, 2, 4, 44100, false);
  15. DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
  16. TargetDataLine microphone;
  17. SourceDataLine speakers;
  18. try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
  19. InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
  20. Recognizer recognizer = new Recognizer(model, 120000)) {
  21. try {
  22. microphone = (TargetDataLine) AudioSystem.getLine(info);
  23. microphone.open(format);
  24. microphone.start();
  25. ByteArrayOutputStream out = new ByteArrayOutputStream();
  26. int numBytesRead;
  27. int CHUNK_SIZE = 1024;
  28. int bytesRead = 0;
  29. DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
  30. speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
  31. speakers.open(format);
  32. speakers.start();
  33. byte[] b = new byte[4096];
  34. while (bytesRead <= 100000000) {
  35. byte[] audioData = new byte[CHUNK_SIZE];
  36. numBytesRead = ais.read(audioData, 0, CHUNK_SIZE);
  37. bytesRead += numBytesRead;
  38. out.write(audioData, 0, numBytesRead);
  39. speakers.write(audioData, 0, numBytesRead);
  40. if (recognizer.acceptWaveForm(audioData, numBytesRead)) {
  41. result.append(getResult(recognizer.getResult()));
  42. } else {
  43. result.append(getResult(recognizer.getPartialResult()));
  44. }
  45. }
  46. result.append(getResult(recognizer.getFinalResult()));
  47. speakers.drain();
  48. speakers.close();
  49. microphone.close();
  50. } catch (Exception e) {
  51. e.printStackTrace();
  52. }
  53. System.out.println(result.toString());
  54. } catch (IOException e) {
  55. throw new RuntimeException(e);
  56. } catch (UnsupportedAudioFileException e) {
  57. throw new RuntimeException(e);
  58. }
  59. }
  60. /**
  61. * 获取返回结果
  62. *
  63. * @param result
  64. * @return
  65. */
  66. private static String getResult(String result) {
  67. VoskResult vr = JSON.parseObject(result,VoskResult.class);
  68. return Optional.ofNullable(vr).map(VoskResult::getText).orElse("");
  69. }
  70. public static void main1(String[] argv) throws IOException, UnsupportedAudioFileException {
  71. LibVosk.setLogLevel(LogLevel.DEBUG);
  72. StringBuilder result = new StringBuilder();
  73. try (Model model = new Model("I:\\workspace\\vosk-model-small-cn-0.22");
  74. InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("I:\\workspace\\test_audio.wav")));
  75. Recognizer recognizer = new Recognizer(model, 120000)) {
  76. int nbytes;
  77. byte[] b = new byte[4096];
  78. while ((nbytes = ais.read(b)) >= 0) {
  79. if (recognizer.acceptWaveForm(b, nbytes)) {
  80. result.append(getResult(recognizer.getResult()));
  81. } else {
  82. result.append(getResult(recognizer.getPartialResult()));
  83. }
  84. }
  85. result.append(getResult(recognizer.getFinalResult()));
  86. }
  87. System.out.println(result);
  88. }
  89. }

感谢网上各位大佬能分享这些信息

测试可行,识别率没有做过对比、大模型也没有试过。这里也就提供一种可行的离线解决方案。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/人工智能uu/article/detail/899042
推荐阅读
相关标签
  

闽ICP备14008679号