一直想写一个HEVC的码流解析工具,看了雷神264码流解析工具,本来想尝试模仿写一个相似的265码流分析工具,但是发现265的解码过程和结构体和264的不太一样,很多结构体并没有完全暴露出来,没有想到很好的方法获得量化参数,运动向量等这些信息。想着从头学习一下ffmpeg中的265解码函数,再来获取解码后的量化参数,运动向量等一系列信息,再做码流分析。
这里主要学习HEVC的解析函数代码
众所周知,解码器是标准的,因此只要按照官方给定的解码流程对码流进行解码就能正常解码。(以前不知道下图这种语法元素描述的作用,最近才知道解码器的代码完全和描述对的上,就是伪代码形式,难怪雷神说parse vps,sps,pps的代码没什么技术含量)
ffmpeg为了封装成多种编解码器,是提供了包装函数,这些函数根据提供编解码标准来选择解码器。我主要看的是HEVC解码,不过其他的解码器也相似。
参考我前面的博客,经过一堆初始化函数后,首先进行的是av_parser_parse2()函数,对码流进行解析。主要是VPS,SPS,PPS进行解码。
av_parser_parse2( )
//此代码位于libavcodec\parser.c中
int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int64_t pts, int64_t dts, int64_t pos)
{
int index, i;
uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];
av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);
/* Parsers only work for the specified codec ids. */
av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
avctx->codec_id == s->parser->codec_ids[1] ||
avctx->codec_id == s->parser->codec_ids[2] ||
avctx->codec_id == s->parser->codec_ids[3] ||
avctx->codec_id == s->parser->codec_ids[4] ||
avctx->codec_id == s->parser->codec_ids[5] ||
avctx->codec_id == s->parser->codec_ids[6]);
if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
s->next_frame_offset =
s->cur_offset = pos;
s->flags |= PARSER_FLAG_FETCHED_OFFSET;
}
if (buf_size == 0) {
/* padding is always necessary even if EOF, so we add it here */
memset(dummy_buf, 0, sizeof(dummy_buf));
buf = dummy_buf;
} else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
/* add a new packet descriptor */
i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
s->cur_frame_start_index = i;
s->cur_frame_offset[i] = s->cur_offset;
s->cur_frame_end[i] = s->cur_offset + buf_size;
s->cur_frame_pts[i] = pts;
s->cur_frame_dts[i] = dts;
s->cur_frame_pos[i] = pos;
}
if (s->fetch_timestamp) {
s->fetch_timestamp = 0;
s->last_pts = s->pts;
s->last_dts = s->dts;
s->last_pos = s->pos;
ff_fetch_timestamp(s, 0, 0, 0);
}
/* WARNING: the returned index can be negative */
// 从这里进入码流解析,parser_parse是一个函数指针,在hevc的解码过程中会指向hevc_parse()函数,
// 此代码位于libavcodec\hevc_parser.c中,后续其他的解析函数也均在此文件内。
// s->parser在av_parser_init()函数中被赋值。
// 通过av_parser_iterate()遍历来寻找对应的解析结构体,这个函数的定义位于libavcodec\parser.c中,这个文件里面的parser_list通过#include "libavcodec/parser_list.c"导入,
// 但是没有找到原始文件里面有这个文件,后面发现这个文件是在configure后生成的,类似的生成文件还包括codec_list.c
index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
poutbuf_size, buf, buf_size);
// 这个函数里面会调用对应编码标准的解析函数,这里会调用hevc_parse()函数。
av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
FILL(field_order);
FILL(coded_width);
FILL(coded_height);
FILL(width);
FILL(height);
}
/* update the file pointer */
if (*poutbuf_size) {
/* fill the data for the current frame */
s->frame_offset = s->next_frame_offset;
/* offset of the next frame */
s->next_frame_offset = s->cur_offset + index;
s->fetch_timestamp = 1;
} else {
/* Don't return a pointer to dummy_buf. */
*poutbuf = NULL;
}
if (index < 0)
index = 0;
s->cur_offset += index;
return index;
}
hevc_parse( )
hevc_parse()位于libavcodec\hevc_parser.c,这个函数主要是解析额外数据,获得完整帧数据,并进行解析。
static int hevc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
int next;
HEVCParserContext *ctx = s->priv_data;
ParseContext *pc = &ctx->pc;
int is_dummy_buf = !buf_size;
const uint8_t *dummy_buf = buf;
// 解析额外的数据,主要包含用于存储一些对于编解码过程非必需,但又是非常有用的附加信息。这些信息通常是特定于编码的,用于初始化编解码器。
if (avctx->extradata && !ctx->parsed_extradata) {
ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, &ctx->ps, &ctx->sei,
&ctx->is_avc, &ctx->nal_length_size, avctx->err_recognition,
1, avctx);
ctx->parsed_extradata = 1;
}
if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
next = buf_size;
} else {
next = hevc_find_frame_end(s, buf, buf_size);//寻找帧的起始标记,也即#define START_CODE 0x000001 ///< start_code_prefix_one_3bytes,可以用UltraEdit查看码流的16进制表示,这样更清晰知道解码的完整流程
// 这里将传递的码流组成为一个完整帧数据
if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
*poutbuf = NULL;
*poutbuf_size = 0;
return buf_size;
}
}
is_dummy_buf &= (dummy_buf == buf);
if (!is_dummy_buf)// 这里开始进行解析
parse_nal_units(s, buf, buf_size, avctx);
*poutbuf = buf;
*poutbuf_size = buf_size;
return next;
}
parse_nal_units()
parse_nal_units()函数位于libavcodec\hevc_parser.c,里面主要根据nal的类型分别对VPS,SPS,PPS,SEI等信息进行解析
static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
int buf_size, AVCodecContext *avctx)
{
HEVCParserContext *ctx = s->priv_data;
HEVCParamSets *ps = &ctx->ps;
HEVCSEI *sei = &ctx->sei;
int ret, i;
/* set some sane default values */
s->pict_type = AV_PICTURE_TYPE_I;
s->key_frame = 0;
s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
ff_hevc_reset_sei(sei);
ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, ctx->is_avc,
ctx->nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
if (ret < 0)
return ret;
for (i = 0; i < ctx->pkt.nb_nals; i++) {
H2645NAL *nal = &ctx->pkt.nals[i];
GetBitContext *gb = &nal->gb;
if (nal->nuh_layer_id > 0)
continue;
switch (nal->type) {
case HEVC_NAL_VPS:
ff_hevc_decode_nal_vps(gb, avctx, ps);
break;
case HEVC_NAL_SPS:
ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
break;
case HEVC_NAL_PPS:
ff_hevc_decode_nal_pps(gb, avctx, ps);
break;
case HEVC_NAL_SEI_PREFIX:
case HEVC_NAL_SEI_SUFFIX:
ff_hevc_decode_nal_sei(gb, avctx, sei, ps, nal->type);
break;
case HEVC_NAL_TRAIL_N:
case HEVC_NAL_TRAIL_R:
case HEVC_NAL_TSA_N:
case HEVC_NAL_TSA_R:
case HEVC_NAL_STSA_N:
case HEVC_NAL_STSA_R:
case HEVC_NAL_BLA_W_LP:
case HEVC_NAL_BLA_W_RADL:
case HEVC_NAL_BLA_N_LP:
case HEVC_NAL_IDR_W_RADL:
case HEVC_NAL_IDR_N_LP:
case HEVC_NAL_CRA_NUT:
case HEVC_NAL_RADL_N:
case HEVC_NAL_RADL_R:
case HEVC_NAL_RASL_N:
case HEVC_NAL_RASL_R:
if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING) {
s->repeat_pict = 1;
} else if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING) {
s->repeat_pict = 2;
}
ret = hevc_parse_slice_header(s, nal, avctx);
if (ret)
return ret;
break;
}
}
/* didn't find a picture! */
av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
return -1;
}
这里主要看了VPS,SPS和PPS的函数
VPS,SPS和PPS的函数功能都相差不大,由于代码过长就不贴了,这些函数都位于libavcodec\hevc_ps.c中,这里以VPS的解析函数ff_hevc_decode_nal_vps()为例。
下图为VPS的语法元素描述(《新一代高效视频编码 H.265/HEVC:原理、标准与实现》 每一章都有对应的语法描述,或者去官方文件里面查看)
int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
HEVCParamSets *ps)
{
int i,j;
int vps_id = 0;
ptrdiff_t nal_size;
HEVCVPS *vps = ff_refstruct_allocz(sizeof(*vps));
if (!vps)
return AVERROR(ENOMEM);
av_log(avctx, AV_LOG_DEBUG, "Decoding VPS\n");
nal_size = gb->buffer_end - gb->buffer;
if (nal_size > sizeof(vps->data)) {
av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized VPS "
"(%"PTRDIFF_SPECIFIER" > %"SIZE_SPECIFIER")\n",
nal_size, sizeof(vps->data));
vps->data_size = sizeof(vps->data);
} else {
vps->data_size = nal_size;
}
memcpy(vps->data, gb->buffer, vps->data_size);
vps_id = vps->vps_id = get_bits(gb, 4);
if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
goto err;
}
vps->vps_max_layers = get_bits(gb, 6) + 1;
vps->vps_max_sub_layers = get_bits(gb, 3) + 1;
vps->vps_temporal_id_nesting_flag = get_bits1(gb);
if (get_bits(gb, 16) != 0xffff) { // vps_reserved_ffff_16bits
av_log(avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
goto err;
}
if (vps->vps_max_sub_layers > HEVC_MAX_SUB_LAYERS) {
av_log(avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
vps->vps_max_sub_layers);
goto err;
}
if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
goto err;
vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_sub_layers - 1;
for (; i < vps->vps_max_sub_layers; i++) {
vps->vps_max_dec_pic_buffering[i] = get_ue_golomb_long(gb) + 1;
vps->vps_num_reorder_pics[i] = get_ue_golomb_long(gb);
vps->vps_max_latency_increase[i] = get_ue_golomb_long(gb) - 1;
if (vps->vps_max_dec_pic_buffering[i] > HEVC_MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
av_log(avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
vps->vps_max_dec_pic_buffering[i] - 1);
goto err;
}
if (vps->vps_num_reorder_pics[i] > vps->vps_max_dec_pic_buffering[i] - 1) {
av_log(avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
vps->vps_num_reorder_pics[i]);
if (avctx->err_recognition & AV_EF_EXPLODE)
goto err;
}
}
vps->vps_max_layer_id = get_bits(gb, 6);
vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
if (vps->vps_num_layer_sets < 1 || vps->vps_num_layer_sets > 1024 ||
(vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
av_log(avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
goto err;
}
for (i = 1; i < vps->vps_num_layer_sets; i++)
for (j = 0; j <= vps->vps_max_layer_id; j++)
skip_bits(gb, 1); // layer_id_included_flag[i][j]
vps->vps_timing_info_present_flag = get_bits1(gb);
if (vps->vps_timing_info_present_flag) {
vps->vps_num_units_in_tick = get_bits_long(gb, 32);
vps->vps_time_scale = get_bits_long(gb, 32);
vps->vps_poc_proportional_to_timing_flag = get_bits1(gb);
if (vps->vps_poc_proportional_to_timing_flag)
vps->vps_num_ticks_poc_diff_one = get_ue_golomb_long(gb) + 1;
vps->vps_num_hrd_parameters = get_ue_golomb_long(gb);
if (vps->vps_num_hrd_parameters > (unsigned)vps->vps_num_layer_sets) {
av_log(avctx, AV_LOG_ERROR,
"vps_num_hrd_parameters %d is invalid\n", vps->vps_num_hrd_parameters);
goto err;
}
for (i = 0; i < vps->vps_num_hrd_parameters; i++) {
int common_inf_present = 1;
get_ue_golomb_long(gb); // hrd_layer_set_idx
if (i)
common_inf_present = get_bits1(gb);
decode_hrd(gb, common_inf_present, &vps->hdr[i],
vps->vps_max_sub_layers);
}
}
get_bits1(gb); /* vps_extension_flag */
if (get_bits_left(gb) < 0) {
av_log(avctx, AV_LOG_ERROR,
"Overread VPS by %d bits\n", -get_bits_left(gb));
if (ps->vps_list[vps_id])
goto err;
}
if (ps->vps_list[vps_id] &&
!memcmp(ps->vps_list[vps_id], vps, sizeof(*vps))) {
ff_refstruct_unref(&vps);
} else {
remove_vps(ps, vps_id);
ps->vps_list[vps_id] = vps;
}
return 0;
err:
ff_refstruct_unref(&vps);
return AVERROR_INVALIDDATA;
}
可以看出每个语法元素都能找到对应的一行代码,且变量名都完全一样,将上述语法描述和代码对应起来看就可以明白这些解析函数代码的含义。