目录
- 1、前言
- 2、目前主流的FPGA图像缩放方案
- 3、目前主流的FPGA视频拼接方案
- 4、本设计方案的优越性
- 5、详细设计方案解读
- HDMI输入
- 图像缩放
- 图像缓存
- VGA时序
- HDMI输出
- 6、vivado工程详解
- 7、上板调试验证
- 8、福利:工程源码获取
1、前言
本文详细描述了FPGA纯verilog代码实现4路视频缩放拼接的实现设计方案,工程代码编译通过后上板调试验证,文章末尾有演示视频,可直接项目移植,适用于在校学生、研究生,也适用于在职工程师做项目开发,可应用于医疗、军工等行业的数字成像和图像传输领域;
提供完整的、跑通的工程源码和技术支持;
工程源码和技术支持的获取方式放在了文章末尾,请耐心看到最后。
2、目前主流的FPGA图像缩放方案
目前市面上主流的FPGA图像缩放方案如下:
1:Xilinx的HLS方案,该方案简单,易于实现,但只能用于Xilinx自家的FPGA;关于HLS实现图像缩放请,参考我之前写的文章HLS实现图像缩放点击查看:HLS图像缩放
2:非纯Verilog方案,大部分代码使用Verilog实现,但中间的fifo或ram等使用了IP,导致移植性变差,难以在Xilinx、Altera和国产FPGA之间自由移植;
3:纯Verilog方案,也就是本方案,一个字:牛逼!!!
3、目前主流的FPGA视频拼接方案
FPGA实现视频拼接是FPGA在图像处理领域的基本应用,如果你的视频是AXIS流,且你的开发板是K7或者zynq之类的高端处理器,可以使用Xilinx官方的video mixer实现视频拼接,关于video mixer实现视频拼接,可以参考我之前写的文章点击查看:video mixer实现视频拼接
但是,对于使用A7或者Spartan6之类的低端FPGA开发者来说,video mixer就不适用了,再者,video mixer必须是AXIS接口,对于vga时序或者摄像头rgb时序而言也不适用,必须用Xilinx的ip转为AXIS流,如此不仅麻烦且加大了逻辑资源消耗,这时,本文的纯verilog视频拼接方案就有用了。
4、本设计方案的优越性
一个字:牛逼,表现如下:
1:纯Verilog代码实现,学习性和阅读性达到天花板;
2:移植性达到天花板,Xilinx、Altera和国产FPGA之间自由移植;
3:算法达到天花板,融合了邻近插值和双线性插值两种算法;
4:实用性达到天花板,将图像缩放和图像拼接融合一体,符合实际的工程项目,做类似项目的兄弟可直接拿去用,一个月工资直接拿到手。。。
5、详细设计方案解读
详细设计方案框图如下:
HDMI输入
视频输入采用HDMI,分辨率为1920x1080,HDMI采用silicon9011芯片解码,silicon9011需要i2c配置才能使用,关于silicon9011芯片的配置和使用,请参考我之前写的文章点击查看:silicon9011
输入只有1路HDMI,为了模拟4路视频输入,直接将解码后的视频数据给到4路图像缩放模块。
图像缩放
本图像缩放模块将常用的双线性插值和邻域插值算法融合为一个代码中,通过输入参数选择某一种算法;代码使用纯verilog实现,没有任何ip,可在Xilinx、Intel、国产FPGA间任意移植;代码以ram和fifo为核心进行数据缓存和插值实现;
网上也有其他图像缩放例程代码,但大多使用了IP,导致在其他FPGA器件上移植变得困难,通用性不好;相比之下,本设计代码就具有通用性;
这里例化了4个图像缩放模块,将4路输入视频缩小至960x540;
关于图像缩放的详细设计说明,请参考我之前写的文章点击查看:图像缩放
图像缓存
使用FDMA方案实现图像三帧缓存,关于FDMA方案实现图像三帧缓存,请参考我之前写的文章点击查看:FDMA图像缓存
VGA时序
这部分很简单,不必多讲,就是生成一个1080P的VGA时序;
HDMI输出
视频输出采用HDMI,分辨率为1920x1080,HDMI采用silicon9134芯片编码,silicon9134需要i2c配置才能使用,关于silicon9134芯片的配置和使用,请参考我之前写的文章点击查看:silicon9134
6、vivado工程详解
工程BD如下:
综合后的代码架构如下:
FPGA资源消耗以及功耗预估如下:
顶层代码源码如下:
`timescale 1ns / 1ps
module top(
//ddr3
output [14:0]DDR3_0_addr,
output [2:0]DDR3_0_ba ,
output DDR3_0_cas_n ,
output [0:0]DDR3_0_ck_n ,
output [0:0]DDR3_0_ck_p ,
output [0:0]DDR3_0_cke ,
output [0:0]DDR3_0_cs_n ,
output [3:0]DDR3_0_dm ,
inout [31:0]DDR3_0_dq ,
inout [3:0]DDR3_0_dqs_n ,
inout [3:0]DDR3_0_dqs_p ,
output [0:0]DDR3_0_odt ,
output DDR3_0_ras_n ,
output DDR3_0_reset_n ,
output DDR3_0_we_n ,
input CLK_IN1_D_0_clk_n,
input CLK_IN1_D_0_clk_p,
output ddr3_ok ,
//hdmi_in
output hdmi_in_nreset , //9011/9013 reset
input vin_clk , //clock for 9111/9013
input vin_hs , //horizontal synchronization for 9011/9013
input vin_vs , //vertical synchronization for 9011/9013
input vin_de , //data valid for 9011/9013
input[23:0] vin_data , //data for 9011/9013
inout hdmi_scl , //HDMI I2C clock
inout hdmi_sda , //HDMI I2C data
output hdmi_nreset , //9134 reset
//hdmi_out
output vout_hs , //horizontal synchronization for 9134
output vout_vs , //vertical synchronization for 9134
output vout_de , //data valid for 9134
output vout_clk , //clock for 9134
output[23:0] vout_data //data for 9134
);
wire clk_200m ;
wire clk_hdmi ;
wire pll_resetn;
wire [0:0] resetn;
wire ud_r_0_ud_rclk;
wire [31:0] ud_r_0_ud_rdata;
wire ud_r_0_ud_rde;
wire ud_r_0_ud_rvs;
wire ud_w_0_ud_wclk;
wire [31:0] ud_w_0_ud_wdata;
wire ud_w_0_ud_wde;
wire ud_w_0_ud_wvs;
wire [31:0] ud_w_1_ud_wdata;
wire ud_w_1_ud_wde;
wire ud_w_1_ud_wvs;
wire [31:0] ud_w_2_ud_wdata;
wire ud_w_2_ud_wde;
wire ud_w_2_ud_wvs;
wire [31:0] ud_w_3_ud_wdata;
wire ud_w_3_ud_wde;
wire ud_w_3_ud_wvs;
wire ui_clk_100m;
wire [9:0] lut_index;
wire [31:0] lut_data;
wire [23:0] i_rgb;
wire o_hs ;
wire o_vs ;
wire o_de ;
wire [23:0] o_rgb;
wire hdmi_clk_rstn;
wire o_data_req;
wire [23:0] resize0_rgb;
wire resize0_vs ;
wire resize0_de ;
wire [23:0] resize1_rgb;
wire resize1_vs ;
wire resize1_de ;
wire [23:0] resize2_rgb;
wire resize2_vs ;
wire resize2_de ;
wire [23:0] resize3_rgb;
wire resize3_vs ;
wire resize3_de ;
assign hdmi_nreset =pll_resetn;
assign hdmi_in_nreset=pll_resetn;
assign ud_w_0_ud_wclk =vin_clk ;
assign ud_w_0_ud_wvs =resize0_vs ;
assign ud_w_0_ud_wde =resize0_de ;
assign ud_w_0_ud_wdata=resize0_rgb;
assign ud_w_1_ud_wclk =vin_clk ;
assign ud_w_1_ud_wvs =resize1_vs ;
assign ud_w_1_ud_wde =resize1_de ;
assign ud_w_1_ud_wdata=resize1_rgb;
assign ud_w_2_ud_wclk =vin_clk ;
assign ud_w_2_ud_wvs =resize2_vs ;
assign ud_w_2_ud_wde =resize2_de ;
assign ud_w_2_ud_wdata=resize2_rgb;
assign ud_w_3_ud_wclk =vin_clk ;
assign ud_w_3_ud_wvs =resize3_vs ;
assign ud_w_3_ud_wde =resize3_de ;
assign ud_w_3_ud_wdata=resize3_rgb;
assign ud_r_0_ud_rclk=clk_hdmi;
assign ud_r_0_ud_rvs=o_vs;
assign ud_r_0_ud_rde=o_data_req;
assign i_rgb=ud_r_0_ud_rdata[23:0];
assign vout_clk=clk_hdmi;
assign vout_hs=o_hs;
assign vout_vs=o_vs;
assign vout_de=o_de;
assign vout_data=o_rgb;
i2c_config i2c_config_m0(
.rst (~pll_resetn ),
.clk (clk_200m ),
.clk_div_cnt (16'd500 ),
.i2c_addr_2byte (1'b0 ),
.lut_index (lut_index ),
.lut_dev_addr (lut_data[31:24]),
.lut_reg_addr (lut_data[23:8] ),
.lut_reg_data (lut_data[7:0] ),
.error ( ),
.done ( ),
.i2c_scl (hdmi_scl ),
.i2c_sda (hdmi_sda )
);
lut_hdmi lut_hdmi_m0
(
.lut_index (lut_index),
.lut_data (lut_data )
);
helai_video_scale #(
.DATA_WIDTH (8 ), //Width of input/output data
.CHANNELS (3 ), //Number of channels of DATA_WIDTH, for color images
.DISCARD_CNT_WIDTH (8 ), //Width of inputDiscardCnt
.INPUT_X_RES_WIDTH (11), //Widths of input/output resolution control signals
.INPUT_Y_RES_WIDTH (11),
.OUTPUT_X_RES_WIDTH (11),
.OUTPUT_Y_RES_WIDTH (11),
.FRACTION_BITS (8 ), //Number of bits for fractional component of coefficients.
.SCALE_INT_BITS (8 ), //Width of integer component of scaling factor. The maximum input data width to
.SCALE_FRAC_BITS (14), //Width of fractional component of scaling factor
.BUFFER_SIZE (4 ) //Depth of RFIFO
)video_scale0(
.clk (vin_clk ),
.rst (~pll_resetn),
.i_vid_data (vin_data ),
.i_vid_de (vin_de ),
.o_vid_fifo_read (),
.i_vid_vs (vin_vs ),
.o_vout_data (resize0_rgb),
.o_vout_de (resize0_de ), //latency of 4 clock cycles after nextDout is asserted
.o_vout_vs (resize0_vs ),
.i_vout_read (1),
.i_discard_cnt (0), //Number of input pixels to discard before processing data. Used for clipping
.i_src_image_x (1920-1), //Resolution of input data minus 1
.i_src_image_y (1080-1),
.i_des_image_x (960-1), //Resolution of output data minus 1
.i_des_image_y (540-1),
.i_scaler_x_ratio(32'h4000 * (1920-1) / (960-1)-1), //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_scaler_y_ratio(32'h4000 * (1080-1) / (540-1)-1), //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_left_offset (0), //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS
.i_top_offset (0), //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS
.i_scaler_type (0) //Use nearest neighbor resize instead of bilinear
);
helai_video_scale #(
.DATA_WIDTH (8 ), //Width of input/output data
.CHANNELS (3 ), //Number of channels of DATA_WIDTH, for color images
.DISCARD_CNT_WIDTH (8 ), //Width of inputDiscardCnt
.INPUT_X_RES_WIDTH (11), //Widths of input/output resolution control signals
.INPUT_Y_RES_WIDTH (11),
.OUTPUT_X_RES_WIDTH (11),
.OUTPUT_Y_RES_WIDTH (11),
.FRACTION_BITS (8 ), //Number of bits for fractional component of coefficients.
.SCALE_INT_BITS (8 ), //Width of integer component of scaling factor. The maximum input data width to
.SCALE_FRAC_BITS (14), //Width of fractional component of scaling factor
.BUFFER_SIZE (4 ) //Depth of RFIFO
)video_scale1(
.clk (vin_clk ),
.rst (~pll_resetn),
.i_vid_data (vin_data ),
.i_vid_de (vin_de ),
.o_vid_fifo_read (),
.i_vid_vs (vin_vs ),
.o_vout_data (resize1_rgb),
.o_vout_de (resize1_de ), //latency of 4 clock cycles after nextDout is asserted
.o_vout_vs (resize1_vs ),
.i_vout_read (1),
.i_discard_cnt (0), //Number of input pixels to discard before processing data. Used for clipping
.i_src_image_x (1920-1), //Resolution of input data minus 1
.i_src_image_y (1080-1),
.i_des_image_x (960-1), //Resolution of output data minus 1
.i_des_image_y (540-1),
.i_scaler_x_ratio(32'h4000 * (1920-1) / (960-1)-1), //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_scaler_y_ratio(32'h4000 * (1080-1) / (540-1)-1), //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_left_offset (0), //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS
.i_top_offset (0), //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS
.i_scaler_type (0) //Use nearest neighbor resize instead of bilinear
);
helai_video_scale #(
.DATA_WIDTH (8 ), //Width of input/output data
.CHANNELS (3 ), //Number of channels of DATA_WIDTH, for color images
.DISCARD_CNT_WIDTH (8 ), //Width of inputDiscardCnt
.INPUT_X_RES_WIDTH (11), //Widths of input/output resolution control signals
.INPUT_Y_RES_WIDTH (11),
.OUTPUT_X_RES_WIDTH (11),
.OUTPUT_Y_RES_WIDTH (11),
.FRACTION_BITS (8 ), //Number of bits for fractional component of coefficients.
.SCALE_INT_BITS (8 ), //Width of integer component of scaling factor. The maximum input data width to
.SCALE_FRAC_BITS (14), //Width of fractional component of scaling factor
.BUFFER_SIZE (4 ) //Depth of RFIFO
)video_scale2(
.clk (vin_clk ),
.rst (~pll_resetn),
.i_vid_data (vin_data ),
.i_vid_de (vin_de ),
.o_vid_fifo_read (),
.i_vid_vs (vin_vs ),
.o_vout_data (resize2_rgb),
.o_vout_de (resize2_de ), //latency of 4 clock cycles after nextDout is asserted
.o_vout_vs (resize2_vs ),
.i_vout_read (1),
.i_discard_cnt (0), //Number of input pixels to discard before processing data. Used for clipping
.i_src_image_x (1920-1), //Resolution of input data minus 1
.i_src_image_y (1080-1),
.i_des_image_x (960-1), //Resolution of output data minus 1
.i_des_image_y (540-1),
.i_scaler_x_ratio(32'h4000 * (1920-1) / (960-1)-1), //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_scaler_y_ratio(32'h4000 * (1080-1) / (540-1)-1), //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_left_offset (0), //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS
.i_top_offset (0), //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS
.i_scaler_type (0) //Use nearest neighbor resize instead of bilinear
);
helai_video_scale #(
.DATA_WIDTH (8 ), //Width of input/output data
.CHANNELS (3 ), //Number of channels of DATA_WIDTH, for color images
.DISCARD_CNT_WIDTH (8 ), //Width of inputDiscardCnt
.INPUT_X_RES_WIDTH (11), //Widths of input/output resolution control signals
.INPUT_Y_RES_WIDTH (11),
.OUTPUT_X_RES_WIDTH (11),
.OUTPUT_Y_RES_WIDTH (11),
.FRACTION_BITS (8 ), //Number of bits for fractional component of coefficients.
.SCALE_INT_BITS (8 ), //Width of integer component of scaling factor. The maximum input data width to
.SCALE_FRAC_BITS (14), //Width of fractional component of scaling factor
.BUFFER_SIZE (4 ) //Depth of RFIFO
)video_scale3(
.clk (vin_clk ),
.rst (~pll_resetn),
.i_vid_data (vin_data ),
.i_vid_de (vin_de ),
.o_vid_fifo_read (),
.i_vid_vs (vin_vs ),
.o_vout_data (resize3_rgb),
.o_vout_de (resize3_de ), //latency of 4 clock cycles after nextDout is asserted
.o_vout_vs (resize3_vs ),
.i_vout_read (1),
.i_discard_cnt (0), //Number of input pixels to discard before processing data. Used for clipping
.i_src_image_x (1920-1), //Resolution of input data minus 1
.i_src_image_y (1080-1),
.i_des_image_x (960-1), //Resolution of output data minus 1
.i_des_image_y (540-1),
.i_scaler_x_ratio(32'h4000 * (1920-1) / (960-1)-1), //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_scaler_y_ratio(32'h4000 * (1080-1) / (540-1)-1), //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS
.i_left_offset (0), //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS
.i_top_offset (0), //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS
.i_scaler_type (0) //Use nearest neighbor resize instead of bilinear
);
design_1_wrapper u_design_1_wrapper
(
.CLK_IN1_D_0_clk_n(CLK_IN1_D_0_clk_n),
.CLK_IN1_D_0_clk_p(CLK_IN1_D_0_clk_p),
.DDR3_0_addr (DDR3_0_addr ),
.DDR3_0_ba (DDR3_0_ba ),
.DDR3_0_cas_n (DDR3_0_cas_n ),
.DDR3_0_ck_n (DDR3_0_ck_n ),
.DDR3_0_ck_p (DDR3_0_ck_p ),
.DDR3_0_cke (DDR3_0_cke ),
.DDR3_0_cs_n (DDR3_0_cs_n ),
.DDR3_0_dm (DDR3_0_dm ),
.DDR3_0_dq (DDR3_0_dq ),
.DDR3_0_dqs_n (DDR3_0_dqs_n ),
.DDR3_0_dqs_p (DDR3_0_dqs_p ),
.DDR3_0_odt (DDR3_0_odt ),
.DDR3_0_ras_n (DDR3_0_ras_n ),
.DDR3_0_reset_n (DDR3_0_reset_n ),
.DDR3_0_we_n (DDR3_0_we_n ),
.clk_200m (clk_200m ),
.clk_hdmi (clk_hdmi ),
.ddr3_ok (ddr3_ok ),
.pll_resetn (pll_resetn ),
.resetn (resetn ),
.ud_r_0_ud_rclk (ud_r_0_ud_rclk ),
.ud_r_0_ud_rdata (ud_r_0_ud_rdata ),
.ud_r_0_ud_rde (ud_r_0_ud_rde ),
.ud_r_0_ud_rempty (ud_r_0_ud_rempty ),
.ud_r_0_ud_rvs (ud_r_0_ud_rvs ),
.ud_w_0_ud_wclk (ud_w_0_ud_wclk ),
.ud_w_0_ud_wdata (ud_w_0_ud_wdata ),
.ud_w_0_ud_wde (ud_w_0_ud_wde ),
.ud_w_0_ud_wfull (ud_w_0_ud_wfull ),
.ud_w_0_ud_wvs (ud_w_0_ud_wvs ),
.ud_w_1_ud_wclk (ud_w_1_ud_wclk ),
.ud_w_1_ud_wdata (ud_w_1_ud_wdata ),
.ud_w_1_ud_wde (ud_w_1_ud_wde ),
.ud_w_1_ud_wvs (ud_w_1_ud_wvs ),
.ud_w_2_ud_wclk (ud_w_1_ud_wclk ),
.ud_w_2_ud_wdata (ud_w_1_ud_wdata ),
.ud_w_2_ud_wde (ud_w_1_ud_wde ),
.ud_w_2_ud_wvs (ud_w_1_ud_wvs ),
.ud_w_3_ud_wclk (ud_w_1_ud_wclk ),
.ud_w_3_ud_wdata (ud_w_1_ud_wdata ),
.ud_w_3_ud_wde (ud_w_1_ud_wde ),
.ud_w_3_ud_wvs (ud_w_1_ud_wvs ),
.ui_clk_100m (ui_clk_100m )
);
video_timing_control vga(
.i_clk (clk_hdmi ),
.i_rst_n (pll_resetn ),
.i_start_x (0),
.i_start_y (0),
.i_disp_h (1920),
.i_disp_v (1080),
.i_rgb (i_rgb ),
.o_hs (o_hs ),
.o_vs (o_vs ),
.o_de (o_de ),
.o_rgb (o_rgb ),
.o_data_req(o_data_req )
);
endmodule
7、上板调试验证
开发板:Xilinx Artix7-35T开发板;
开发环境:vivado2019.1;
输入:HDMI,1080P,silicon9011解码;
输出:HDMI,1080P,silicon9134编码;
静态演示如下:
由于采用了懂王的视频,所以审核不过,只能截取点图片了。。。
8、福利:工程源码获取
福利:工程代码的获取
代码太大,无法邮箱发送,以某度网盘链接方式发送;
资料如下:获取方式:私,或者文章结尾的V名片;