赞
踩
本文DEMO源码下载:
https://download.csdn.net/download/xjb2006/85109025
dxgi桌面屏幕录像(windows屏幕录像,硬件编码,声音捕获,音视频同步)
由于篇幅有限,分为4篇发表:
1、SDK接口一览:
2、声音采集部分:
3、屏幕捕获部分:
4、编码,录像部分:
距离上篇文章已经过了快1年了,才有时间把正式DEMO传上来,直接上个截图看看吧:
该DEMO演示了win10屏幕录像的核心功能,包含音源选择(支持麦克风,计算机声音和2者混合),屏幕选择(主副屏选择),鼠标,帧率,码率,硬件编码,实时预览,双录制(同时录制为flv,mp4)等基本功能。为了扩展需要,程序核心模块做成DLL动态库,可以多语言扩展,适用于C++,C#,JAVA,VB,Python等等其他语言。
视频编码:
DXGI输出一般是RGB32,需要将颜色空间转换为YV12或者NV12等才能进行编码,现支持libx264,h264_qsv,h264_nvenc,至于h264_amf,由于暂无测试环境,无法确认是否能用。
颜色空间转换采用libyuv或者ipp进行高效转换,经比较,效率较ffmpeg更高。
优先选择硬件加速(GPU)编码时,会自动判断是否有h264_qsv,h264_nvenc,如果没有则选择libx264,当然也可以手动指定编码器。
将RGB32进行缩放的libyuv算法:
static void ResizeRGB32(BYTE *pIn,int scx,int scy,BYTE *pOut,int dcx,int dcy)
{
int ret=libyuv::ARGBScale(pIn,scx*4,scx,scy,pOut,dcx*4,dcx,dcy,libyuv::kFilterLinear);
ret=0;
}
验证编码器的函数:
bool CFFFindEncoder::Find(char* szName)
{
//MessageBox(0,L"1",0,0);
AVCodecContext *c= NULL;
//AVFrame *frame;
AVPacket *pkt;
AVCodec *codec=avcodec_find_encoder_by_name(szName);
if(codec==0)
return false;
//MessageBox(0,L"2",0,0);
c = avcodec_alloc_context3(codec);
if (!c)
{
return false;
}
//MessageBox(0,L"3",0,0);
pkt = av_packet_alloc();
if (!pkt)
{
return false;
}
c->bit_rate = 8000000;
c->width = 1920;
c->height = 1080;
c->time_base.num=1;
c->time_base.den=25;
c->framerate.num=25;
c->framerate.den=1;
c->gop_size = 10;
c->max_b_frames = 0;
c->pix_fmt = AV_PIX_FMT_NV12;//AV_PIX_FMT_YUV420P;
if (codec->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
int ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
return false;
}
//MessageBox(0,L"4",0,0);
avcodec_free_context(&c);
av_packet_free(&pkt);
return true;
}
音频编码:
包含了音频转码,混音,编码等,由于本人不太喜欢ffmpeg的AAC编码必须输入为AV_SAMPLE_FMT_FLTP,我直接用了libfaac库进行编码,输入为S16。ACM比较简单,网上也很好搜到,我贴ffmpeg的音频转码封装吧。对于AAC编码,libfaac也比较简单,就不贴代码了。
转码代码:
- #pragma once
- #include "stdafx.h"
- class CAudioACM
- {
- private:
- char *audio_out_buffer=0;
- SwrContext* audio_convert_ctx = 0;
- int m_nInSampleRate;
- int m_nInChannel;
- int m_nOutSampleRate;
- int m_nOutChannel;
- AVSampleFormat in_sample_fmt;
- AVSampleFormat out_sample_fmt;
- int Getbytes_per_sample(int sampleFormat)
- {
- int bytes_per_sample = 2;
- switch (sampleFormat)
- {
- case AV_SAMPLE_FMT_U8P:
- case AV_SAMPLE_FMT_U8:
- bytes_per_sample = 8 >> 3;
- break;
- case AV_SAMPLE_FMT_S16P:
- case AV_SAMPLE_FMT_S16:
- bytes_per_sample = 16 >> 3;
- break;
- case AV_SAMPLE_FMT_S32:
- case AV_SAMPLE_FMT_S32P:
- case AV_SAMPLE_FMT_FLT:
- case AV_SAMPLE_FMT_FLTP:
- bytes_per_sample = 32 >> 3;
- break;
- case AV_SAMPLE_FMT_DBL:
- case AV_SAMPLE_FMT_DBLP:
- bytes_per_sample = 64 >> 3;
- break;
- default:
- bytes_per_sample = 0;
- break;
- }
- return bytes_per_sample;
- }
-
-
-
- public:
- void Init(int nInSampleRate, int nInChannel,int nInFMT, int nOutSampleRate, int nOutChannel, int nOutFMT)//nFMT:0S16 1FLTP
- {
- if (audio_out_buffer)
- {
- delete[]audio_out_buffer;
- audio_out_buffer = 0;
- }
- audio_out_buffer = new char[1024 * 1024];
- m_nInSampleRate = nInSampleRate;
- m_nInChannel = nInChannel;
- m_nOutSampleRate = nOutSampleRate;
- m_nOutChannel = nOutChannel;
- in_sample_fmt = nInFMT==0?AV_SAMPLE_FMT_S16: AV_SAMPLE_FMT_FLTP;
- out_sample_fmt = nOutFMT == 0 ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLTP;
- audio_convert_ctx = swr_alloc_set_opts(0, av_get_default_channel_layout(nOutChannel), out_sample_fmt, nOutSampleRate,
- av_get_default_channel_layout(nInChannel), in_sample_fmt, nInSampleRate, 0, NULL);//配置源音频参数和目标音频参数
- int n1 = av_get_default_channel_layout(nOutChannel);
- int n4 = av_get_default_channel_layout(nInChannel);
- swr_init(audio_convert_ctx);
-
-
- }
- void InitFrame(AVFrame ** srcFrame,char* pPCM,int nSize,int samplerate,int channel,int format= AV_SAMPLE_FMT_S16)
- {
- *srcFrame = av_frame_alloc();
- (*srcFrame)->nb_samples = nSize/ channel/2;
- (*srcFrame)->channels = channel;
- (*srcFrame)->channel_layout = av_get_default_channel_layout(channel);
- (*srcFrame)->format = format;
- (*srcFrame)->sample_rate = samplerate;
- (*srcFrame)->data[0] = (*srcFrame)->extended_data[0] = (uint8_t*)pPCM;
- avcodec_fill_audio_frame(*srcFrame, channel, (AVSampleFormat)format, (const uint8_t*)pPCM, nSize, 0);
- }
-
- int Resample(char *pIn,int nLen, AVFrame* pOutFrame)
- {
- AVFrame* srcFrame = 0;
- InitFrame(&srcFrame, pIn, nLen, m_nInSampleRate, m_nInChannel);
- Resample(srcFrame, pOutFrame);
- av_frame_free(&srcFrame);
- return 0;
- }
-
- int Resample(AVFrame * pFrame_A, AVFrame* pOutFrame)
- {
- if (!audio_convert_ctx)
- return 0;
- int bytes_per_sample = Getbytes_per_sample(out_sample_fmt);
- int in_samples_per_channel = pFrame_A->nb_samples;
- if (pFrame_A->sample_rate < 8000)
- pFrame_A->sample_rate = 8000;
- int out_samples_per_channel = av_rescale_rnd(128 + in_samples_per_channel, m_nOutSampleRate, pFrame_A->sample_rate, AV_ROUND_UP);
- int size_per_sample_with_channels = m_nOutChannel * bytes_per_sample;
- int out_size = out_samples_per_channel * size_per_sample_with_channels;
- unsigned char* out[] = { (unsigned char*)audio_out_buffer };
- //int converted_samplers_per_channel = swr_convert(audio_convert_ctx, out, out_samples_per_channel, (const uint8_t**)pFrame_A->extended_data, in_samples_per_channel);
- int converted_samplers_per_channel = swr_convert(audio_convert_ctx, out, in_samples_per_channel, (const uint8_t**)pFrame_A->extended_data, in_samples_per_channel);
- if (converted_samplers_per_channel > 0)
- {
- pOutFrame->nb_samples = converted_samplers_per_channel;
- pOutFrame->sample_rate = m_nOutSampleRate;
- pOutFrame->channels = m_nOutChannel;
- pOutFrame->extended_data[0]=pOutFrame->data[0]= out[0];
- // pOutFrame->linesize[0] = pFrame_A->linesize[0];
-
- //m_pcmfifo.push((BYTE*)audio_out_buffer, converted_samplers_per_channel * size_per_sample_with_channels);
- }
-
- return 0;
- }
-
- void close()
- {
- if (audio_out_buffer)
- {
- delete[]audio_out_buffer;
- audio_out_buffer = 0;
- }
- if (audio_convert_ctx)
- {
- swr_close(audio_convert_ctx); //xjb2018
- swr_free(&audio_convert_ctx);
- }
- }
-
- public:
- CAudioACM()
- {
- audio_convert_ctx = 0;
- m_nInSampleRate = 44100;
- m_nInChannel = 2;
- m_nOutSampleRate = 44100;
- m_nOutChannel = 2;
- }
- ~CAudioACM()
- {
- close();
- }
-
-
-
- };
再贴一个简单的混音代码,因为要同时录制麦克风声音+计算机声音,就需要同时转换为44100,双声道,16位,然后进行混音,算法也比较简单,就是两个声音相加,我这里减半相加,都差不多,看你自己选择!
- static void WaveMix(BYTE *pBuf0,BYTE *pBuf1,BYTE *pBufOut,int nLen)
- {
- int all=0;
- short w,w1;
- WORD wOK;
- for(int i=0;i<nLen;i+=2)
- {
- w=MAKEWORD(pBuf0[i],pBuf0[i+1]);
- w=w/2;
- w1=MAKEWORD(pBuf1[i],pBuf1[i+1]);
- w1=w1/2;
- all=w+w1;
- if(all>32767)
- all=32767;
- if(all<-32767)
- all=-32767;
- wOK= all;
- (pBufOut)[i]=LOBYTE(wOK);
- (pBufOut)[i+1]=HIBYTE(wOK);
- }
- }
录像:
前面已经得到了AAC和H264的数据了,我们就可以mix成MP4文件了,合成按照ffmpeg例子步骤来,我贴音视频同步的代码吧,先说说音视频同步,其实很简单的,很多人搞不懂。我们现实中用的是时分秒的计时单位,音视频中常用是毫秒(ms)。音视频中也是一样的,不过时间戳单位并不是ms,以时间戳AVRational time_base这个为例子, pAVCodecContext->time_base.num = 1; pAVCodecContext->time_base.den = 90000;一般会看到这种表达方式,这里的时间戳表示的单位就不是1ms了,而是1/90000秒为单位。
音频的时间戳单位:time_base.den = 44100;
视频的时间戳单位:time_base.den = 90000;
上面的时间我们给她个名字吧,叫音视频编码器时间,而下面说的是ms时间。我们要做的事情就是把ms时间转成音视频编码器时间。
音视频同步:一般我是以音频为参考,因为音频不能丢,可以想象,如果音频丢了一帧比视频丢了一帧影响大得多,视频多一帧少一帧看不出来的。视频去同步音频,我们认为音频硬件是可以信任的时间计时单位,读取1024个sample就是23ms。视频去同步音频,视频可以用帧率去计算时间,公式就变成这样:__int64 vtime = m_nVideoCount * 1000/m_fps;但是有个问题,你能保证编码帧率绝对30帧?显然不可能,所以我们用系统时间去计算,公式就变成这样:__int64 vtime = (av_gettime() - m_nStartVideoTime) / 1000;(m_nStartVideoTime是开始编码时间)。
音频ms时间的代码:
__int64 COutPutStream::GetAudioTime()
{
if (m_samplerate > 0)
return (double)(m_nAudioCount) * (double)1024 * (double)1000 / (double)m_samplerate;
return 0;
}
我们演算下,输入m_nAudioCount=1时,得到的时间为23.22ms,是吧!
视频ms时间的代码:
__int64 COutPutStream::GetVideoTime()
{
if (m_nStartVideoTime > 0)
{
__int64 vtime = (av_gettime() - m_nStartVideoTime) / 1000;
//__int64 vtime = m_nVideoCount * 1000/m_fps;//
return vtime;
}
return 0;
}
这就好理解了,时间戳那里直接填:
视频时间戳:
- int lTimeStamp=this->GetVideoTime()*1000;
-
- enc_pkt.stream_index = pAVStream_Video->index;
- enc_pkt.dts = enc_pkt.pts = (INT64)90000 * (lTimeStamp) / AV_TIME_BASE;//转换为视频编码器时间
- AVRational timebase;
- timebase.den = 90000; timebase.num = 1;
- av_packet_rescale_ts(&enc_pkt, timebase, pAVStream_Video->time_base);//转换为mux时间
-
- av_interleaved_write_frame(pOutAVFormatContext, &enc_pkt);
音频时间戳:
- pkt.size = nAACSize;
- pkt.pts = pkt.dts = 1024 * m_nAudioCount;//转为音频编码时间(看得懂吗?多理解下,呵呵)
- AVRational timebase;
- timebase.den = m_samplerate;//xiao
- timebase.num = 1;
- av_packet_rescale_ts(&pkt, timebase, pAVStream_Audio->time_base);//转为mux时间
- m_nAudioCount++;
-
- ...
-
- av_interleaved_write_frame(pOutAVFormatContext, &pkt);
这样就高枕无忧了?理论上是,但是实际上,音频和视频同步上可能会有偏差,我们就要进行调节,我是这样做的,在H264视频写入代码段内,如果音频时间-视频时间>100ms,那么视频时间戳m_nStartVideoTime-=(5*1000);代码实现:(如果你有更好的方法请告诉我!)
- if (audioMS - dbMS > 100 || audioMS - dbMS < -100)//强制同步
- {
- if(audioMS - dbMS > 100)
- {
- m_nStartVideoTime-=(5*1000);//10ms
- }
- else
- {
- m_nStartVideoTime+=(5*1000);//10ms
- }
- }
这样就可以高枕无忧了,万事大吉,今晚吃鸡!
补充下为什么要双录(录成mp4和flv),这一切都是为了保险而生!因为mp4并不是为流媒体而生的,她的音视频重要信息都存储在文件结尾,而且要停止录像时才存储,所以中途一旦断电或者程序崩溃,mp4是无法播放的。而flv就不同了,他是为流媒体而设计的,随时中断,崩溃,不会影响视频文件播放。
本文DEMO源码下载:
https://download.csdn.net/download/xjb2006/85109025
dxgi桌面屏幕录像(windows屏幕录像,硬件编码,声音捕获,音视频同步)
QQ35744025 萧萧工作室
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。