对于一个视频文件(mp4格式/flv格式),audio_pkt或者video_pkt是其最基本的数据单元,即视频文件是由独立的视频编码包或者音频编码包组成的。
解复用就是从视频文件中把视频包/音频包单独读取出来保存成独立文件,那么如何得知packet是视频包还是音频包呢?有这样一个结构体:
typedef struct AVPacket {
AVBufferRef *buf; // 指向数据缓冲区的指针
int64_t pts; // 显示时间戳
int64_t dts; // 解码时间戳
uint8_t *data; // 指向数据缓冲区的指针
int size; // 数据缓冲区大小
int stream_index; // 数据包所属的流标签
int flags; // 数据包的标志位
AVPacketSideData *side_data; // 侧数据数组
int side_data_elems; // 侧数据数组的元素数量
int64_t duration; // 数据包的持续时间
int64_t pos; // 数据包在输入文件中的位置
int64_t convergence_duration; // 数据包的收敛持续时间(弃用)
} AVPacket;
AVPacket中的stream_index标记了该包是属于音频流还是视频流,stream_index对应什么值的时候是属于音频流/视频流呢?那就需要解析flv/mp4文件,我们可以通过以下方式获得视频流的相关信息:
char* in_filename = "/home/yx/media_file/believe.flv"; // 定义媒体流路径
AVFormatContext *in_file_ctx = NULL; // 媒体流上下文
int videoindex = -1; // 视频索引
int audioindex = -1; // 音频索引
int result = avformat_open_input(&in_file_ctx,in_filename,NULL,NULL); // 打开媒体流(将输入文件与媒体流相关)
result = avformat_find_stream_info(in_file_ctx,NULL); // 查找媒体流信息
printf("stream number:%d\n",in_file_ctx->nb_streams); // 打印媒体流中流种类个数,一般只有两个:音频/视频
for(uint32_t i = 0;i < in_file_ctx->nb_streams; i++) // 遍历两个流
{
AVStream* in_stream = in_file_ctx->streams[i]; // 指定视频流文件中第i个流
if(in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
printf("**********音频流**********\n");
printf("samplerate:%dHz\n",in_stream->codecpar->sample_rate); // 采样率
printf("index:%d\n",in_stream->index); // 媒体流标签
printf("channel number:%d\n",in_stream->codecpar->channels); // 声道数
if(in_stream->codecpar->format == AV_SAMPLE_FMT_FLTP) // 采样格式
printf("sampleformat:AV_SAMPLE_FMT_FLTP\n");
else if(in_stream->codecpar->format == AV_SAMPLE_FMT_S16P)
printf("sampleformat:AV_SAMPLE_FMT_S16P\n");
if(in_stream->codecpar->codec_id == AV_CODEC_ID_AAC) // 打印音频流编码格式
printf("audio codec:AV_CODEC_ID_AAC\n");
else if(in_stream->codecpar->codec_id == AV_CODEC_ID_MP3)
printf("audio codec:AV_CODEC_ID_MP3\n");
else
printf("audio codec:%d\n",in_stream->codecpar->codec_id);
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_audio = (in_stream->duration)*av_q2d(in_stream->time_base);
printf("audio duration: %02d:%02d:%02d\n",duration_audio/3600,(duration_audio % 3600)/60,(duration_audio % 60));
}
else
printf("audio duration unknown\n");
audioindex = i; // 获得音频标签
}
else if(in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
{
printf("**********视频流**********\n");
printf("fps:%lffps\n",av_q2d(in_stream->avg_frame_rate)); // 帧率
printf("index:%d\n",in_stream->index); // 媒体流标签
printf("width:%d,height:%d\n",in_stream->codecpar->width,in_stream->codecpar->height); // 声道数
if(in_stream->codecpar->codec_id = AV_CODEC_ID_MPEG4)
printf("video codec:MPEG4\n");
else if(in_stream->codecpar->codec_id = AV_CODEC_ID_H264)
printf("video codec:H264\n");
else
printf("video codec:%d\n",in_stream->codecpar->codec_id);
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_audio = (in_stream->duration)*av_q2d(in_stream->time_base);
printf("video duration: %02d:%02d:%02d\n",duration_audio/3600,(duration_audio % 3600)/60,(duration_audio % 60));
}
else
printf("video duration unknown\n");
videoindex = i; // 获得视频标签
}
}
此时我们就获得了解复用最关键的信息:视频流标签和音频流标签,接下来只需要依次读取视频流中的packet,依次判断AVPacket中的stream_index来区分音频或者视频,这里先读取20个packet进行分析:
AVPacket* pkt = av_packet_alloc();
int pkt_count = 0; // 当前是第0个包
int print_count = 20; // 最大打印十个包的信息
while(pkt_count<=20) // 只解析20个包
{
result = av_read_frame(in_file_ctx,pkt); // 依次从输入视频来读取包
if(result < 0)
{
printf("av_read_frame fail\n");
break;
}
if(pkt_count++ < print_count)
{
if(pkt->stream_index == audioindex)
{
printf("audioindex:%d\n",audioindex);
printf("audio pts: %lld\n", pkt->pts);
printf("audio dts: %lld\n", pkt->dts);
printf("audio size: %d\n", pkt->size);
printf("audio pos: %lld\n", pkt->pos);
printf("audio duration: %lf\n\n",pkt->duration * av_q2d(in_file_ctx->streams[audioindex]->time_base));
}
else if(pkt->stream_index == videoindex)
{
printf("videoindex:%d\n",videoindex);
printf("video pts: %lld\n", pkt->pts);
printf("video dts: %lld\n", pkt->dts);
printf("video size: %d\n", pkt->size);
printf("video pos: %lld\n", pkt->pos);
printf("video duration: %lf\n\n",pkt->duration * av_q2d(in_file_ctx->streams[videoindex]->time_base));
}
}
av_packet_unref(pkt); // 解析完引用计数-1,自动释放
}
这里我们读取到视频包或者音频包后,打印包的详细信息:
pts:编码时间戳,dts:解码时间戳,size:包的大小,pos:包当前的位置。
每一个包的相关信息读取之后,调用 av_packet_unref(pkt)使引用计数–,当计数减为0,系统会自动释放该部分空间。
完整代码如下:
#include <stdio.h>
#include "libavformat/avformat.h"
void demux_flv()
{
char* in_filename = "/home/yx/media_file/believe.flv";
printf("输入文件路径%s\n",in_filename);
AVFormatContext *in_file_ctx = NULL; // 媒体流上下文
int videoindex = -1; // 视频索引
int audioindex = -1; // 音频索引
int result = avformat_open_input(&in_file_ctx,in_filename,NULL,NULL); // 打开媒体流(将输入文件与媒体流相关)
if(result < 0)
printf("open file fail\n");
result = avformat_find_stream_info(in_file_ctx,NULL); // 查找媒体流信息
if(result < 0)
printf("find stream info fail\n");
av_dump_format(in_file_ctx,0,in_filename,0); // 打印输出媒体流的信息,第1个0表示输出所有流
printf("media name:%s\n",in_file_ctx->url);
printf("stream number:%d\n",in_file_ctx->nb_streams); // 只有两个流:视频流或者音频流
printf("media average radio:%lldkps\n",(int64_t)(in_file_ctx->bit_rate/1024));
int total_seconds,hour,minute,second;
total_seconds = (in_file_ctx->duration)/AV_TIME_BASE;
hour = total_seconds/3600;
minute = (total_seconds % 3600)/60;
second = (total_seconds % 60);
printf("total duration: %02d:%02d:%02d\n",hour,minute,second);
for(uint32_t i = 0;i < in_file_ctx->nb_streams; i++) // 遍历两个流
{
AVStream* in_stream = in_file_ctx->streams[i]; // 指定视频流文件中第i个流
if(in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
printf("**********音频流**********\n");
printf("samplerate:%dHz\n",in_stream->codecpar->sample_rate); // 采样率
printf("index:%d\n",in_stream->index); // 媒体流标签
printf("channel number:%d\n",in_stream->codecpar->channels); // 声道数
if(in_stream->codecpar->format == AV_SAMPLE_FMT_FLTP) // 采样格式
printf("sampleformat:AV_SAMPLE_FMT_FLTP\n");
else if(in_stream->codecpar->format == AV_SAMPLE_FMT_S16P)
printf("sampleformat:AV_SAMPLE_FMT_S16P\n");
if(in_stream->codecpar->codec_id == AV_CODEC_ID_AAC) // 打印音频流编码格式
printf("audio codec:AV_CODEC_ID_AAC\n");
else if(in_stream->codecpar->codec_id == AV_CODEC_ID_MP3)
printf("audio codec:AV_CODEC_ID_MP3\n");
else
printf("audio codec:%d\n",in_stream->codecpar->codec_id);
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_audio = (in_stream->duration)*av_q2d(in_stream->time_base);
printf("audio duration: %02d:%02d:%02d\n",duration_audio/3600,(duration_audio % 3600)/60,(duration_audio % 60));
}
else
printf("audio duration unknown\n");
audioindex = i; // 获得音频标签
}
else if(in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
{
printf("**********视频流**********\n");
printf("fps:%lffps\n",av_q2d(in_stream->avg_frame_rate)); // 帧率
printf("index:%d\n",in_stream->index); // 媒体流标签
printf("width:%d,height:%d\n",in_stream->codecpar->width,in_stream->codecpar->height); // 声道数
if(in_stream->codecpar->codec_id = AV_CODEC_ID_MPEG4)
printf("video codec:MPEG4\n");
else if(in_stream->codecpar->codec_id = AV_CODEC_ID_H264)
printf("video codec:H264\n");
else
printf("video codec:%d\n",in_stream->codecpar->codec_id);
if(in_stream->duration != AV_NOPTS_VALUE)
{
int duration_audio = (in_stream->duration)*av_q2d(in_stream->time_base);
printf("video duration: %02d:%02d:%02d\n",duration_audio/3600,(duration_audio % 3600)/60,(duration_audio % 60));
}
else
printf("video duration unknown\n");
videoindex = i; // 获得视频标签
}
}
printf("====================================\n");
AVPacket* pkt = av_packet_alloc();
int pkt_count = 0; // 当前是第0个包
int print_count = 20; // 最大打印十个包的信息
while(pkt_count<=20) // 只解析20个包
{
result = av_read_frame(in_file_ctx,pkt); // 依次从输入视频来读取包
if(result < 0)
{
printf("av_read_frame fail\n");
break;
}
if(pkt_count++ < print_count)
{
if(pkt->stream_index == audioindex)
{
printf("audioindex:%d\n",audioindex);
printf("audio pts: %lld\n", pkt->pts);
printf("audio dts: %lld\n", pkt->dts);
printf("audio size: %d\n", pkt->size);
printf("audio pos: %lld\n", pkt->pos);
printf("audio duration: %lf\n\n",pkt->duration * av_q2d(in_file_ctx->streams[audioindex]->time_base));
}
else if(pkt->stream_index == videoindex)
{
printf("videoindex:%d\n",videoindex);
printf("video pts: %lld\n", pkt->pts);
printf("video dts: %lld\n", pkt->dts);
printf("video size: %d\n", pkt->size);
printf("video pos: %lld\n", pkt->pos);
printf("video duration: %lf\n\n",pkt->duration * av_q2d(in_file_ctx->streams[videoindex]->time_base));
}
}
av_packet_unref(pkt); // 解析完引用计数-1,自动释放
}
}
int main()
{
demux_flv();
printf("Hello World!\n");
return 0;
}