FFmpeg的HEVC解码器源代码学习笔记-1

一直想写一个HEVC的码流解析工具，看了雷神264码流解析工具，本来想尝试模仿写一个相似的265码流分析工具，但是发现265的解码过程和结构体和264的不太一样，很多结构体并没有完全暴露出来，没有想到很好的方法获得量化参数，运动向量等这些信息。想着从头学习一下ffmpeg中的265解码函数，再来获取解码后的量化参数，运动向量等一系列信息，再做码流分析。

这里主要学习HEVC的解析函数代码

众所周知，解码器是标准的，因此只要按照官方给定的解码流程对码流进行解码就能正常解码。（以前不知道下图这种语法元素描述的作用，最近才知道解码器的代码完全和描述对的上，就是伪代码形式，难怪雷神说parse vps，sps，pps的代码没什么技术含量）

ffmpeg为了封装成多种编解码器，是提供了包装函数，这些函数根据提供编解码标准来选择解码器。我主要看的是HEVC解码，不过其他的解码器也相似。

参考我前面的博客，经过一堆初始化函数后，首先进行的是av_parser_parse2()函数，对码流进行解析。主要是VPS，SPS，PPS进行解码。

av_parser_parse2( )

//此代码位于libavcodec\parser.c中
int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
                     uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts, int64_t pos)
{
    int index, i;
    uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];

    av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);

    /* Parsers only work for the specified codec ids. */
    av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
               avctx->codec_id == s->parser->codec_ids[1] ||
               avctx->codec_id == s->parser->codec_ids[2] ||
               avctx->codec_id == s->parser->codec_ids[3] ||
               avctx->codec_id == s->parser->codec_ids[4] ||
               avctx->codec_id == s->parser->codec_ids[5] ||
               avctx->codec_id == s->parser->codec_ids[6]);

    if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
        s->next_frame_offset =
        s->cur_offset        = pos;
        s->flags            |= PARSER_FLAG_FETCHED_OFFSET;
    }

    if (buf_size == 0) {
        /* padding is always necessary even if EOF, so we add it here */
        memset(dummy_buf, 0, sizeof(dummy_buf));
        buf = dummy_buf;
    } else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
        /* add a new packet descriptor */
        i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
        s->cur_frame_start_index = i;
        s->cur_frame_offset[i]   = s->cur_offset;
        s->cur_frame_end[i]      = s->cur_offset + buf_size;
        s->cur_frame_pts[i]      = pts;
        s->cur_frame_dts[i]      = dts;
        s->cur_frame_pos[i]      = pos;
    }

    if (s->fetch_timestamp) {
        s->fetch_timestamp = 0;
        s->last_pts        = s->pts;
        s->last_dts        = s->dts;
        s->last_pos        = s->pos;
        ff_fetch_timestamp(s, 0, 0, 0);
    }
    /* WARNING: the returned index can be negative */
    // 从这里进入码流解析，parser_parse是一个函数指针，在hevc的解码过程中会指向hevc_parse()函数,
    // 此代码位于libavcodec\hevc_parser.c中，后续其他的解析函数也均在此文件内。
    // s->parser在av_parser_init()函数中被赋值。
    // 通过av_parser_iterate()遍历来寻找对应的解析结构体，这个函数的定义位于libavcodec\parser.c中，这个文件里面的parser_list通过#include "libavcodec/parser_list.c"导入，
    // 但是没有找到原始文件里面有这个文件，后面发现这个文件是在configure后生成的，类似的生成文件还包括codec_list.c
    index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
                                    poutbuf_size, buf, buf_size);
    // 这个函数里面会调用对应编码标准的解析函数，这里会调用hevc_parse()函数。
    av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
        FILL(field_order);
        FILL(coded_width);
        FILL(coded_height);
        FILL(width);
        FILL(height);
    }

    /* update the file pointer */
    if (*poutbuf_size) {
        /* fill the data for the current frame */
        s->frame_offset = s->next_frame_offset;

        /* offset of the next frame */
        s->next_frame_offset = s->cur_offset + index;
        s->fetch_timestamp   = 1;
    } else {
        /* Don't return a pointer to dummy_buf. */
        *poutbuf = NULL;
    }
    if (index < 0)
        index = 0;
    s->cur_offset += index;
    return index;
}

hevc_parse( )

hevc_parse()位于libavcodec\hevc_parser.c，这个函数主要是解析额外数据，获得完整帧数据，并进行解析。

static int hevc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
{
    int next;
    HEVCParserContext *ctx = s->priv_data;
    ParseContext *pc = &ctx->pc;
    int is_dummy_buf = !buf_size;
    const uint8_t *dummy_buf = buf;
    // 解析额外的数据，主要包含用于存储一些对于编解码过程非必需，但又是非常有用的附加信息。这些信息通常是特定于编码的，用于初始化编解码器。
    if (avctx->extradata && !ctx->parsed_extradata) {
        ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, &ctx->ps, &ctx->sei,
                                 &ctx->is_avc, &ctx->nal_length_size, avctx->err_recognition,
                                 1, avctx);
        ctx->parsed_extradata = 1;
    }

    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
        next = buf_size;
    } else {
        next = hevc_find_frame_end(s, buf, buf_size);//寻找帧的起始标记，也即#define START_CODE 0x000001 ///< start_code_prefix_one_3bytes，可以用UltraEdit查看码流的16进制表示，这样更清晰知道解码的完整流程

		// 这里将传递的码流组成为一个完整帧数据
        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
            *poutbuf      = NULL;
            *poutbuf_size = 0;
            return buf_size;
        }
    }

    is_dummy_buf &= (dummy_buf == buf);

    if (!is_dummy_buf)// 这里开始进行解析
        parse_nal_units(s, buf, buf_size, avctx);
    *poutbuf      = buf;
    *poutbuf_size = buf_size;
    return next;
}

parse_nal_units()

parse_nal_units()函数位于libavcodec\hevc_parser.c，里面主要根据nal的类型分别对VPS，SPS，PPS，SEI等信息进行解析

static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
                           int buf_size, AVCodecContext *avctx)
{
    HEVCParserContext *ctx = s->priv_data;
    HEVCParamSets *ps = &ctx->ps;
    HEVCSEI *sei = &ctx->sei;
    int ret, i;

    /* set some sane default values */
    s->pict_type         = AV_PICTURE_TYPE_I;
    s->key_frame         = 0;
    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;

    ff_hevc_reset_sei(sei);

    ret = ff_h2645_packet_split(&ctx->pkt, buf, buf_size, avctx, ctx->is_avc,
                                ctx->nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
    if (ret < 0)
        return ret;

    for (i = 0; i < ctx->pkt.nb_nals; i++) {
        H2645NAL *nal = &ctx->pkt.nals[i];
        GetBitContext *gb = &nal->gb;

        if (nal->nuh_layer_id > 0)
            continue;

        switch (nal->type) {
        case HEVC_NAL_VPS:
            ff_hevc_decode_nal_vps(gb, avctx, ps);
            break;
        case HEVC_NAL_SPS:
            ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
            break;
        case HEVC_NAL_PPS:
            ff_hevc_decode_nal_pps(gb, avctx, ps);
            break;
        case HEVC_NAL_SEI_PREFIX:
        case HEVC_NAL_SEI_SUFFIX:
            ff_hevc_decode_nal_sei(gb, avctx, sei, ps, nal->type);
            break;
        case HEVC_NAL_TRAIL_N:
        case HEVC_NAL_TRAIL_R:
        case HEVC_NAL_TSA_N:
        case HEVC_NAL_TSA_R:
        case HEVC_NAL_STSA_N:
        case HEVC_NAL_STSA_R:
        case HEVC_NAL_BLA_W_LP:
        case HEVC_NAL_BLA_W_RADL:
        case HEVC_NAL_BLA_N_LP:
        case HEVC_NAL_IDR_W_RADL:
        case HEVC_NAL_IDR_N_LP:
        case HEVC_NAL_CRA_NUT:
        case HEVC_NAL_RADL_N:
        case HEVC_NAL_RADL_R:
        case HEVC_NAL_RASL_N:
        case HEVC_NAL_RASL_R:
            if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING) {
                s->repeat_pict = 1;
            } else if (ctx->sei.picture_timing.picture_struct == HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING) {
                s->repeat_pict = 2;
            }
            ret = hevc_parse_slice_header(s, nal, avctx);
            if (ret)
                return ret;
            break;
        }
    }
    /* didn't find a picture! */
    av_log(avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
    return -1;
}

这里主要看了VPS，SPS和PPS的函数
VPS，SPS和PPS的函数功能都相差不大，由于代码过长就不贴了，这些函数都位于libavcodec\hevc_ps.c中，这里以VPS的解析函数ff_hevc_decode_nal_vps()为例。
下图为VPS的语法元素描述（《新一代高效视频编码 H.265/HEVC：原理、标准与实现》每一章都有对应的语法描述，或者去官方文件里面查看）
在这里插入图片描述

int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
                           HEVCParamSets *ps)
{
    int i,j;
    int vps_id = 0;
    ptrdiff_t nal_size;
    HEVCVPS *vps = ff_refstruct_allocz(sizeof(*vps));

    if (!vps)
        return AVERROR(ENOMEM);

    av_log(avctx, AV_LOG_DEBUG, "Decoding VPS\n");

    nal_size = gb->buffer_end - gb->buffer;
    if (nal_size > sizeof(vps->data)) {
        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized VPS "
               "(%"PTRDIFF_SPECIFIER" > %"SIZE_SPECIFIER")\n",
               nal_size, sizeof(vps->data));
        vps->data_size = sizeof(vps->data);
    } else {
        vps->data_size = nal_size;
    }
    memcpy(vps->data, gb->buffer, vps->data_size);

    vps_id = vps->vps_id = get_bits(gb, 4);

    if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
        av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
        goto err;
    }

    vps->vps_max_layers               = get_bits(gb, 6) + 1;
    vps->vps_max_sub_layers           = get_bits(gb, 3) + 1;
    vps->vps_temporal_id_nesting_flag = get_bits1(gb);

    if (get_bits(gb, 16) != 0xffff) { // vps_reserved_ffff_16bits
        av_log(avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
        goto err;
    }

    if (vps->vps_max_sub_layers > HEVC_MAX_SUB_LAYERS) {
        av_log(avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
               vps->vps_max_sub_layers);
        goto err;
    }

    if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
        goto err;

    vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);

    i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_sub_layers - 1;
    for (; i < vps->vps_max_sub_layers; i++) {
        vps->vps_max_dec_pic_buffering[i] = get_ue_golomb_long(gb) + 1;
        vps->vps_num_reorder_pics[i]      = get_ue_golomb_long(gb);
        vps->vps_max_latency_increase[i]  = get_ue_golomb_long(gb) - 1;

        if (vps->vps_max_dec_pic_buffering[i] > HEVC_MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
            av_log(avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
                   vps->vps_max_dec_pic_buffering[i] - 1);
            goto err;
        }
        if (vps->vps_num_reorder_pics[i] > vps->vps_max_dec_pic_buffering[i] - 1) {
            av_log(avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
                   vps->vps_num_reorder_pics[i]);
            if (avctx->err_recognition & AV_EF_EXPLODE)
                goto err;
        }
    }

    vps->vps_max_layer_id   = get_bits(gb, 6);
    vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
    if (vps->vps_num_layer_sets < 1 || vps->vps_num_layer_sets > 1024 ||
        (vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
        av_log(avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
        goto err;
    }

    for (i = 1; i < vps->vps_num_layer_sets; i++)
        for (j = 0; j <= vps->vps_max_layer_id; j++)
            skip_bits(gb, 1);  // layer_id_included_flag[i][j]

    vps->vps_timing_info_present_flag = get_bits1(gb);
    if (vps->vps_timing_info_present_flag) {
        vps->vps_num_units_in_tick               = get_bits_long(gb, 32);
        vps->vps_time_scale                      = get_bits_long(gb, 32);
        vps->vps_poc_proportional_to_timing_flag = get_bits1(gb);
        if (vps->vps_poc_proportional_to_timing_flag)
            vps->vps_num_ticks_poc_diff_one = get_ue_golomb_long(gb) + 1;
        vps->vps_num_hrd_parameters = get_ue_golomb_long(gb);
        if (vps->vps_num_hrd_parameters > (unsigned)vps->vps_num_layer_sets) {
            av_log(avctx, AV_LOG_ERROR,
                   "vps_num_hrd_parameters %d is invalid\n", vps->vps_num_hrd_parameters);
            goto err;
        }
        for (i = 0; i < vps->vps_num_hrd_parameters; i++) {
            int common_inf_present = 1;

            get_ue_golomb_long(gb); // hrd_layer_set_idx
            if (i)
                common_inf_present = get_bits1(gb);
            decode_hrd(gb, common_inf_present, &vps->hdr[i],
                       vps->vps_max_sub_layers);
        }
    }
    get_bits1(gb); /* vps_extension_flag */

    if (get_bits_left(gb) < 0) {
        av_log(avctx, AV_LOG_ERROR,
               "Overread VPS by %d bits\n", -get_bits_left(gb));
        if (ps->vps_list[vps_id])
            goto err;
    }

    if (ps->vps_list[vps_id] &&
        !memcmp(ps->vps_list[vps_id], vps, sizeof(*vps))) {
        ff_refstruct_unref(&vps);
    } else {
        remove_vps(ps, vps_id);
        ps->vps_list[vps_id] = vps;
    }

    return 0;

err:
    ff_refstruct_unref(&vps);
    return AVERROR_INVALIDDATA;
}