当前位置:   article > 正文

Go语学习笔记 - 调用ffmpeg-api实现音频重采样_go ffmpeg api

go ffmpeg api

目录

前言

环境部署

代码

总结


前言

最近对golang处理音视频很感兴趣,对golang音视频常用库goav进行了一番研究。自己写了一个wav转采样率的功能。给大家分享一下,中间遇到了不少坑,解决的过程中还是蛮有意思的。

环境部署

代码运行在Ubuntu环境上,需要使用到goav,goav是对ffmpeg源码的golang封装。

goav地址:https://github.com/giorgisio/goav

goav安装如下

  1. sudo apt-get -y install autoconf automake build-essential libass-dev libfreetype6-dev libsdl1.2-dev libtheora-dev libtool libva-dev libvdpau-dev libvorbis-dev libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev pkg-config texi2html zlib1g-dev
  2. sudo apt install -y libavdevice-dev libavfilter-dev libswscale-dev libavcodec-dev libavformat-dev libswresample-dev libavutil-dev
  3. sudo apt-get install yasm
  4. export FFMPEG_ROOT=$HOME/ffmpeg
  5. export CGO_LDFLAGS="-L$FFMPEG_ROOT/lib/ -lavcodec -lavformat -lavutil -lswscale -lswresample -lavdevice -lavfilter"
  6. export CGO_CFLAGS="-I$FFMPEG_ROOT/include"
  7. export LD_LIBRARY_PATH=$HOME/ffmpeg/lib
  8. ```
  9. ```
  10. go get github.com/xueqing/goav

代码

先看代码

  1. package main
  2. //#include<stdlib.h>
  3. import "C"
  4. import (
  5. "flag"
  6. "fmt"
  7. "github.com/google/logger"
  8. "github.com/xueqing/ffmpeg-demo/logutil"
  9. "github.com/xueqing/goav/libswresample"
  10. "github.com/youpy/go-wav"
  11. "io"
  12. "os"
  13. "reflect"
  14. "unsafe"
  15. )
  16. func main() {
  17. var (
  18. inputUrl string = "./data/1.wav"
  19. inNumChannels int64 = 1
  20. inSampleRate int = 16000
  21. //inBitsPerSample uint16 = 16
  22. outNumChannels int64 = 1
  23. outSampleRate int = 48000
  24. outBitsPerSample uint16 = 16
  25. swr *libswresample.SwrContext = libswresample.SwrAlloc()
  26. )
  27. flag.Parse()
  28. logutil.Init(true, false, "resample.log")
  29. defer logutil.Close()
  30. swr.SwrAllocSetOpts(outNumChannels,
  31. libswresample.AvSampleFormat(1),
  32. outSampleRate,
  33. inNumChannels,
  34. libswresample.AvSampleFormat(1),
  35. inSampleRate,
  36. 0,
  37. 0)
  38. swr.SwrInit()
  39. defer swr.SwrClose()
  40. _inputFile, err := os.Open(inputUrl)
  41. if err != nil {
  42. logger.Errorf("open input file error(%v)", err)
  43. return
  44. }
  45. defer _inputFile.Close()
  46. _reader := wav.NewReader(_inputFile)
  47. format, err := _reader.Format()
  48. if err != nil {
  49. logger.Errorf("input file format error(%v)", err)
  50. return
  51. }
  52. fmt.Printf("input file format info -> AudioFormat:%v,NumChannels:%v,SampleRate:%v,ByteRate:%v,BlockAlign:%v,BitsPerSample:%v", int(format.AudioFormat), format.NumChannels, format.SampleRate, format.ByteRate, format.BlockAlign, format.BitsPerSample)
  53. _tempFile, err := os.CreateTemp("", "*.wav")
  54. if err != nil {
  55. logger.Errorf("create temp file error(%v)", err)
  56. return
  57. }
  58. logger.Infof("Create tempFile %v", _tempFile.Name())
  59. defer func() {
  60. _tempFile.Close()
  61. }()
  62. _samples := []wav.Sample{}
  63. n := 4096
  64. for {
  65. spls, err := _reader.ReadSamples(uint32(n))
  66. if err == io.EOF {
  67. break
  68. }
  69. _samples = append(_samples, spls...)
  70. }
  71. _result := ResampleByFFmpegApi2(swr, _samples)
  72. _writer := wav.NewWriter(_tempFile, uint32(len(_result)), uint16(outNumChannels), uint32(outSampleRate), outBitsPerSample)
  73. err4 := _writer.WriteSamples(_result)
  74. if err4 != nil {
  75. logger.Errorf("write file error(%v)", err4)
  76. err = err4
  77. return
  78. }
  79. }
  80. func ResampleByFFmpegApi2(swr *libswresample.SwrContext, samples []wav.Sample) []wav.Sample {
  81. var (
  82. _inArr **uint8
  83. _outArr **uint8
  84. _inptr []uint16
  85. _outptr []uint16
  86. )
  87. _inArr = (**uint8)(C.malloc(C.sizeof_int))
  88. defer C.free(unsafe.Pointer(_inArr))
  89. _inptr = make([]uint16, len(samples))
  90. _outArr = (**uint8)(C.malloc(C.sizeof_int))
  91. defer C.free(unsafe.Pointer(_outArr))
  92. _outptr = make([]uint16, len(samples)*3)
  93. //fmt.Println(unsafe.Sizeof(uint16(0)))
  94. for i, v := range samples {
  95. _inptr[i] = uint16(v.Values[0])
  96. }
  97. *_inArr = (*uint8)(unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&_inptr)).Data))
  98. *_outArr = (*uint8)(unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&_outptr)).Data))
  99. ret := swr.SwrConvert(_outArr, len(samples)*3, _inArr, len(samples))
  100. if ret > 0 {
  101. fmt.Println(ret)
  102. }
  103. _result := make([]wav.Sample, ret)
  104. for i := 0; i < ret; i++ {
  105. _result[i] = wav.Sample{[2]int{int(_outptr[i]), 0}}
  106. }
  107. return _result
  108. }

代码说明:

1、代码不是个工具方法,如果看懂逻辑的话,可以自行修改成工具方法。

2、里面会用到ffmpeg里面swresample库,对音频数据进行冲采样。

3、可以细看一下,如果你想作实时处理也是可以改的。

4、其中SwrAllocSetOpts方法中有个参数libswresample.AvSampleFormat(1),为什么取1,这里主要是选择采样表示方式的枚举,参考底层源码枚举,我发在下面。我这边因为音频是s16的,所以选择1。

  1. enum AVSampleFormat {
  2. AV_SAMPLE_FMT_NONE = -1,
  3. AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
  4. AV_SAMPLE_FMT_S16, ///< signed 16 bits
  5. AV_SAMPLE_FMT_S32, ///< signed 32 bits
  6. AV_SAMPLE_FMT_FLT, ///< float
  7. AV_SAMPLE_FMT_DBL, ///< double
  8. AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
  9. AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
  10. AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
  11. AV_SAMPLE_FMT_FLTP, ///< float, planar
  12. AV_SAMPLE_FMT_DBLP, ///< double, planar
  13. AV_SAMPLE_FMT_S64, ///< signed 64 bits
  14. AV_SAMPLE_FMT_S64P, ///< signed 64 bits, planar
  15. AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
  16. };

音频准备,输入音频为16k采样率音频。

(base) xxx@hu:~/GolandProjects/MediaRelay/data$ ffmpeg -i 1.wav 
ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilter     7. 57.100 /  7. 57.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  5.100 /  5.  5.100
  libswresample   3.  5.100 /  3.  5.100
  libpostproc    55.  5.100 / 55.  5.100
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, wav, from '1.wav':
  Metadata:
    date            : 2020-09-28
    encoder         : Lavf58.45.100
  Duration: 00:04:01.75, bitrate: 256 kb/s
    Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, mono, s16, 256 kb/s

执行情况

input file format info -> AudioFormat:1,NumChannels:1,SampleRate:16000,ByteRate:32000,BlockAlign:2,BitsPerSample:16INFO : 2022/12/06 17:14:49.937547 csdn_wav_util.go:62: Create tempFile /tmp/2402235346.wav
11603961
 

最终音频

(base) xxx@hu:/tmp$ ffmpeg -i 2402235346.wav 
ffmpeg version 4.2.7-0ubuntu0.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-nvenc --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilter     7. 57.100 /  7. 57.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  5.100 /  5.  5.100
  libswresample   3.  5.100 /  3.  5.100
  libpostproc    55.  5.100 / 55.  5.100
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, wav, from '2402235346.wav':
  Duration: 00:04:01.75, bitrate: 768 kb/s
    Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 48000 Hz, mono, s16, 768 kb/s 

总结

其实在写代码过程中,有个让我特别头疼的东西,就是怎么把数组转为**uint。如果大家有兴趣可以研究一下ResampleByFFmpegApi2方法的转换逻辑,会学到不少东西。

分享:

        我们的疲劳往往不是由工作引起的,而是由于忧烦、挫折和不满等。——《人性的弱点》

本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/喵喵爱编程/article/detail/950095
推荐阅读
相关标签
  

闽ICP备14008679号