基于RTP协议的H264播放器
- 1. 概述
- 2.工程
- 3.测试
- 4.小结
1. 概述
前面记录了一篇基于RTP协议的H264的推流器、接收器的实现过程,但是没有加上解码播放,这里记录一下如何实现解码和播放,也是在前面的基础之上实现的。前一篇的记录为【开源项目】基于RTP协议的H264码流发送器和接收器
在前文中,接收器将接收到的一系列数据包进行解析,并分成了一个个完整的帧,存储在内存之中。下面要将这些完整的帧进行解码成为yuv,并且播放。因此,需要添加解码和播放部分的代码。工程的代码结构为
工程的核心函数是udp_receive_packet(),这个函数的主要工作流程为:
(1)使用recvfrom()来接收数据流
(2)使用check_fragment()对数据流进行解析,并且拷贝到本地内存中
(3)使用h264_parse_packet()来解码所获取的数据,并且使用SDL进行播放
在处理接收的数据流时,我是按照一整个压缩帧进行存储的,例如存储一个完整的Intra帧或者一个完整的P帧,不包含后续帧的信息。然而,使用av_parser_parse2()进行分析时,会首先去寻找下一帧的起始地址来确定当前帧是否完整的输入了,如果没有找到,则很多分析流程不会执行。我在这里用了一个小技巧,在数据内存的最末尾加上一个伪起始码,让av_parser_parse2()确认已经接受了所有的数据,从而进行后续的分析
PS:不过其实这样写并不通用,只是为了配合我的整帧存储方式进行的微调。如果要实现通用的解码,在接收时去掉RTP的头,送入到av_parser_parse2()就可以了
2.工程
头文件的定义中,包括rtp header和rtp packet的定义,还定义了一个全局上下文信息结构体rtp_sdl_context_t
#pragma once
#include <stdio.h>
#include <WinSock2.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "SDL2/SDL.h"
};
#undef main
#define RECV_DATA_SIZE 10000
#define MAX_BUFF_SIZE 32 * 1024 * 1024
#define RTP_MAX_PKT_SIZE 1400 // RTP数据包最大为
#define RTP_HEADER_SIZE 12
#define RTP_PADDING_SIZE 64
#define RTP_PACKET_START 1
#define RTP_PACKET_FRAGMENT 2
#define RTP_PACKET_END 3
#define RECV_STREAM_DOWNLOAD 0
#define RECV_YUV_DOWNLOAD 0
typedef struct rtp_header
{
// 存储时高位存储的是version
/* byte 0 */
uint8_t csrc_len : 4; /* expect 0 */
uint8_t extension : 1; /* expect 1 */
uint8_t padding : 1; /* expect 0 */
uint8_t version : 2; /* expect 2 */
/* byte 1 */
uint8_t payload_type : 7;
uint8_t marker : 1; /* expect 1 */
/* bytes 2, 3 */
uint16_t seq_num;
/* bytes 4-7 */
uint32_t timestamp;
/* bytes 8-11 */
uint32_t ssrc; /* stream number is used here. */
}rtp_header_t;
typedef struct rtp_packet
{
rtp_header_t rtp_h;
uint8_t rtp_data[RTP_MAX_PKT_SIZE + RTP_PADDING_SIZE];
}rtp_packet_t;
typedef struct rtp_context
{
int rtp_packet_cnt;
int rtp_buffer_size;
int rtp_frame_cnt;
int packet_loc; //
uint8_t* rtp_buffer_data;
}rtp_context_t;
typedef struct rtp_sdl_context
{
// video param
const AVCodec* codec;
AVCodecContext* codec_ctx;
AVCodecParserContext* parser_ctx ;
AVFrame* frame;
SwsContext* img_convert_ctx;
// SDL param
SDL_Window* window;
SDL_Renderer* render;
SDL_Texture* texture;
SDL_Rect rect;
}rtp_sdl_context_t;
cpp文件的定义和前文类似,只是增加了一些FFmpeg解码函数和SDL播放函数,重要部分有注释
#pragma warning(disable:4996)
#pragma comment(lib,"ws2_32.lib")
#include "include/udp_rtp_decode_sdl.h"
FILE* fp_yuv;
//int avc_init(const AVCodec* codec, AVCodecContext* codec_ctx, AVCodecParserContext* parser, AVFrame* frame)
int avc_init(rtp_sdl_context_t* rsc)
{
AVCodecID codec_id = AV_CODEC_ID_H264;
rsc->codec = avcodec_find_decoder(codec_id);
if (!rsc->codec)
{
printf("find decoder failed\n");
return -1;
}
rsc->codec_ctx = avcodec_alloc_context3(rsc->codec);
if (!rsc->codec_ctx)
{
printf("alloc context3 failed\n");
return -1;
}
rsc->parser_ctx = av_parser_init(codec_id);
if (!rsc->parser_ctx)
{
printf("parser ctx init failed\n");
return -1;
}
rsc->frame = av_frame_alloc();
if (!rsc->frame)
{
printf("alloc frame failed\n");
return -1;
}
if (avcodec_open2(rsc->codec_ctx, rsc->codec, NULL) < 0)
{
printf("Could not open codec\n");
return -1;
}
return 0;
}
int sdl_init(rtp_sdl_context_t* rsc)
{
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
printf("could not init sdl\n");
return -1;
}
const int screen_w = 1280, screen_h = 720;
const int pixel_w = 1280, pixel_h = 720;
//SDL 2.0 Support for multiple windows
rsc->window = SDL_CreateWindow("Play", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
screen_w, screen_h, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
if (!rsc->window) {
printf("SDL: could not create window - exiting:%s\n", SDL_GetError());
return -1;
}
rsc->render = SDL_CreateRenderer(rsc->window, -1, 0);
int pixformat = SDL_PIXELFORMAT_IYUV;
rsc->texture = SDL_CreateTexture(rsc->render, pixformat, SDL_TEXTUREACCESS_STREAMING, pixel_w, pixel_h);
int border = 0;
rsc->rect.x = 0 + border;
rsc->rect.y = 0 + border;
rsc->rect.w = screen_w - border * 2;
rsc->rect.h = screen_h - border * 2;
}
void av_free_all(rtp_sdl_context_t* rsc)
{
avcodec_free_context(&rsc->codec_ctx);
av_parser_close(rsc->parser_ctx);
av_frame_free(&rsc->frame);
}
int check_nalu_header(uint8_t data0)
{
int forbidden_zero_bit = data0 & 0x80; // 1bit
int nal_ref_idc = data0 & 0x60; // 2 bit
int nal_unit_type = data0 & 0x1F; // 5bit
if ((data0 & 0x80) == 1)
{
printf("forbidden zero bit should be 0\n");
return -1;
}
// printf("forbidden_zero_bit:%d, nal_ref_idc:%d, nal_unit_type:%d\n", forbidden_zero_bit, nal_ref_idc, nal_unit_type);
return nal_unit_type;
}
int check_fragment_nalu_header(rtp_context_t* rtp_ctx, uint8_t data0, uint8_t data1)
{
int nal_unit_type = check_nalu_header(data0);
int s, e, type;
int pos;
if (nal_unit_type == 28) // H264
{
s = data1 & 0x80; // S
e = data1 & 0x40; // E
type = data1 & 0x1F; // type
pos = data1 & 0xC0; // 1100 0000
switch (pos)
{
case 0x80:
rtp_ctx->packet_loc = RTP_PACKET_START;
break;
case 0x40:
rtp_ctx->packet_loc = RTP_PACKET_END;
break;
case 0x00:
rtp_ctx->packet_loc = RTP_PACKET_FRAGMENT;
break;
default: // error
printf("invalid packet loc\n");
return -1;
break;
}
}
return 0;
}
int find_nal_unit(uint8_t* buf, int size, int* nal_start, int* nal_end)
{
int i;
// find start
*nal_start = 0;
*nal_end = 0;
i = 0;
while ( //( next_bits( 24 ) != 0x000001 && next_bits( 32 ) != 0x00000001 )
(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) &&
(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0 || buf[i + 3] != 0x01)
)
{
i++; // skip leading zero
if (i + 4 >= size) { return 0; } // did not find nal start
}
if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) // ( next_bits( 24 ) != 0x000001 )
{
i++;
}
if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) { /* error, should never happen */ return 0; }
i += 3;
*nal_start = i;
while ( //( next_bits( 24 ) != 0x000000 && next_bits( 24 ) != 0x000001 )
(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0) &&
(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01)
)
{
i++;
// FIXME the next line fails when reading a nal that ends exactly at the end of the data
if (i + 3 >= size) { *nal_end = size; return -1; } // did not find nal end, stream ended first
}
*nal_end = i;
return (*nal_end - *nal_start);
}
void set_default_rtp_context(rtp_context_t* rtp_ctx)
{
memset(rtp_ctx->rtp_buffer_data, 0, sizeof(rtp_ctx->rtp_buffer_size));
rtp_ctx->rtp_packet_cnt = 0;
rtp_ctx->rtp_buffer_size = 0;
rtp_ctx->packet_loc = 0;
}
// Check the data is fragment or not, if fragment, try to concate
int check_fragment(rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt, uint8_t* data, int size)
{
int nal_start, nal_end;
int ret = 0;
int data_size = size - RTP_HEADER_SIZE;
find_nal_unit(data, data_size, &nal_start, &nal_end); // check NALU split pos
uint8_t data0 = data[nal_start];
uint8_t data1 = data[nal_start + 1];
uint8_t fu_indicator, fu_header;
if (nal_start > 0 && nal_start < 5) // single-fragment, maybe SPS, PPS or small size frame
{
fu_indicator = 0;
fu_header = 0;
ret = check_nalu_header(data0); // update nalu_type
rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + data_size) * sizeof(uint8_t));
memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data, data_size);
#if STREAM_DOWNLOAD
fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, data_size, fp_in);
#endif
fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, data_size);
rtp_ctx->rtp_frame_cnt++;
rtp_ctx->rtp_buffer_size += data_size;
}
else // multi-fragment
{
fu_indicator = data[0];
fu_header = data[1];
ret = check_fragment_nalu_header(rtp_ctx, fu_indicator, fu_header);
if (ret < 0)
{
printf("invalid nalu header\n");
return -1;
}
int real_data_size = data_size - 2;
rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + real_data_size) * sizeof(uint8_t));
if (!rtp_ctx->rtp_buffer_data)
{
printf("realloc rtp_buffer_data failed\n");
return -1;
}
memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data + 2, real_data_size); // plus 2 to skip fu_indicator and fu_header
#if STREAM_DOWNLOAD
fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, real_data_size, fp_in);
fflush(fp_in);
#endif
rtp_ctx->rtp_packet_cnt++;
rtp_ctx->rtp_buffer_size += real_data_size;
if (rtp_ctx->packet_loc == RTP_PACKET_END) // end of packet
{
fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, rtp_ctx->rtp_buffer_size);
rtp_ctx->rtp_frame_cnt++;
}
}
return 0;
}
// 伪造起始码
int forge_end_code(uint8_t* data, int size)
{
data = (uint8_t*)realloc(data, (size + 6) * sizeof(uint8_t));
if (!data)
{
printf("realloc end code failed\n");
return -1;
}
data[size] = 0x00;
data[size + 1] = 0x00;
data[size + 2] = 0x00;
data[size + 3] = 0x01;
data[size + 4] = 0x41;
data[size + 5] = 0x9A;
size += 6;
return size;
}
int h264_parse_packet(rtp_sdl_context_t* rsc, rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt)
{
AVPacket* packet;
int ret = 0;
packet = av_packet_alloc();
if (!packet)
{
printf("alloc packet failed\n");
return -1;
}
// 添加伪起始码
uint8_t* buf_data = rtp_ctx->rtp_buffer_data;
int data_size = rtp_ctx->rtp_buffer_size;
data_size = forge_end_code(buf_data, data_size);
ret = av_parser_parse2(rsc->parser_ctx, rsc->codec_ctx, &packet->data, &packet->size,
buf_data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
printf("parse packet failed, err:%d\n", ret);
return -1;
}
ret = avcodec_send_packet(rsc->codec_ctx, packet);
if (ret < 0)
{
printf("send packet failed\n");
return -1;
}
rsc->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
rsc->img_convert_ctx = sws_getContext(rsc->codec_ctx->width, rsc->codec_ctx->height, rsc->codec_ctx->pix_fmt,
rsc->codec_ctx->width, rsc->codec_ctx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
while (ret >= 0) {
ret = avcodec_receive_frame(rsc->codec_ctx, rsc->frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return -1;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
//printf("saving frame:%d\n", rsc->codec_ctx->frame_num);
fflush(stdout);
#if RECV_YUV_DOWNLOAD
int size = rsc->frame->width * rsc->frame->height;
fwrite(rsc->frame->data[0], 1, size, fp_yuv);//Y
fwrite(rsc->frame->data[1], 1, size / 4, fp_yuv);//U
fwrite(rsc->frame->data[2], 1, size / 4, fp_yuv);//V
fflush(fp_yuv);
#endif
sws_scale(rsc->img_convert_ctx, (const unsigned char* const*)rsc->frame->data, rsc->frame->linesize, 0, rsc->codec_ctx->height,
rsc->frame->data, rsc->frame->linesize);
// SDL播放
SDL_UpdateYUVTexture(rsc->texture, &rsc->rect,
rsc->frame->data[0], rsc->frame->linesize[0],
rsc->frame->data[1], rsc->frame->linesize[1],
rsc->frame->data[2], rsc->frame->linesize[2]);
SDL_RenderClear(rsc->render);
SDL_RenderCopy(rsc->render, rsc->texture, NULL, &rsc->rect);
SDL_RenderPresent(rsc->render);
SDL_Delay(40); // delay 40ms
}
return 0;
}
// 接收数据包
int udp_recevie_packet(rtp_sdl_context_t* rsc, const char* url, int port)
{
WSADATA wsaData;
WORD sockVersion = MAKEWORD(2, 2);
int cnt = 0;
if (WSAStartup(sockVersion, &wsaData) != 0)
{
return 0;
}
SOCKET ser_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (ser_socket == INVALID_SOCKET)
{
ERROR("Invalid socket");
return -1;
}
int on = 1;
setsockopt(ser_socket, SOL_SOCKET, SO_REUSEADDR, (const char*)& on, sizeof(on));
sockaddr_in ser_addr;
ser_addr.sin_family = AF_INET;
ser_addr.sin_port = htons(port);
ser_addr.sin_addr.s_addr = inet_addr(url);
if (bind(ser_socket, (sockaddr*)& ser_addr, sizeof(ser_addr)) == SOCKET_ERROR)
{
printf("Bind socket addr error\n");
closesocket(ser_socket);
return -1;
}
sockaddr_in remote_addr;
int addr_len = sizeof(remote_addr);
char recv_data[RECV_DATA_SIZE];
rtp_context_t* rtp_ctx = (rtp_context_t*)calloc(1, sizeof(rtp_context_t));
if (!rtp_ctx)
{
printf("alloc rtp_ctx failed\n");
return -1;
}
rtp_packet_t* rtp_pkt = (rtp_packet_t*)calloc(1, sizeof(rtp_packet_t));
if (!rtp_pkt)
{
printf("alloc rtp_pkt failed\n");
return -1;
}
fprintf(stdout, "Listening on port:%d\n", port);
while (1)
{
// recvfrom接收传输过来的数据
int pkt_size = recvfrom(ser_socket, recv_data, RECV_DATA_SIZE, 0, (sockaddr*)& remote_addr, &addr_len);
if (pkt_size > 0)
{
memcpy(rtp_pkt, recv_data, pkt_size);
check_fragment(rtp_ctx, rtp_pkt, rtp_pkt->rtp_data, pkt_size); // check pkt data is fragment or not
rtp_header_t rtp_h = rtp_pkt->rtp_h;
char payload = rtp_h.payload_type;
if (rtp_ctx->packet_loc == RTP_PACKET_END)
{
switch (payload)
{
case 33: // mpegts
// mpegts_packet_parse((uint8_t*)rtp_data, parse_mpegts, payload, rtp_data_size); // TODO: add mpegts parser
printf("MPEGTS type\n");
break;
case 96: // h264
//printf("payload type:%s\n", "H264");
// 进行h264码流的解析
h264_parse_packet(rsc, rtp_ctx, rtp_pkt);
break;
default:
printf("Unknown type\n");
break;
}
// printf("[RTP PKT] %5d| %5s | %10u| %5d| %5d\n", cnt, payload_str, timestamp, seq_num, pkt_size);
set_default_rtp_context(rtp_ctx); // set default rtp ctx value
}
}
}
}
int main()
{
rtp_sdl_context_t* rsc = (rtp_sdl_context_t*)malloc(sizeof(rtp_sdl_context_t));
if (!rsc)
{
printf("malloc rsc failed\n");
return -1;
}
memset(rsc, 0, sizeof(rtp_sdl_context_t));
// 初始化参数
avc_init(rsc);
sdl_init(rsc);
// 如果要存储yuv信息可以设置为1
#if RECV_YUV_DOWNLOAD
fp_yuv = fopen("rtp_receive_yuv.yuv", "wb");
#endif
// 开始接收数据包
udp_recevie_packet(rsc, "127.0.0.1", 8880);
av_free_all(rsc);
#if RECV_YUV_DOWNLOAD
fclose(fp_yuv);
#endif
return 0;
}
3.测试
发送端
接收端
接收端播放正常,感觉可以后续改一改SDL的逻辑,让窗口变成可移动和可缩放的
4.小结
总体来说,这个功能的实现是比较简单的,不过使用了一个小的技巧,伪造了一个起始地址,如果代码格式要求不严格,可以凑合着用。如果要做成大的工程,应该将多个packet直接送入解码器,这样比较合理,也更符合FFmpeg的设计原则