这是对《基于Matlab与FPGA的图像处理教程》的学习笔记,代码和内容摘取自书中。
心得: 使用FPGA进行硬件加速的重点是消除或者减少浮点数运算,转换为定点运算,然后通过pipeline流水设计转为并行实现加速。
原理和方法
RGB与(YUV/YCbCr444)之间的原始公式(基于生物仿真学(Biometric)实验结果的),使用摄像头或者其他传感器进行转换的时候应先了解转换公式(参数会有不同,否则会导致偏色),下面是常用的计算公式。
公式放大256倍(也就是向高位移8位)得到参数
然后忽略小数(fpga中消耗资源较大)进行运算
运算完毕后移位回来
Y = ( R76 + G150 + B*29) >>8
Cb = (-R43 - G84 + B*128 + 32768) >>8
Cr = ( R128 - G107 - B*20 + 32768) >>8
Matlab程序
clc;
% -------------------------------------------------------------------------
% Read PC image to Matlab
IMG1 = imread('../../0_images/Scart.jpg'); % 读取jpg图像
h = size(IMG1,1); % 读取图像高度
w = size(IMG1,2); % 读取图像宽度
subplot(221);imshow(IMG1);title('RGB Image');
% -------------------------------------------------------------------------
% Relized by user logic
% Y = ( R*76 + G*150 + B*29) >>8
% Cb = (-R*43 - G*84 + B*128 + 32768) >>8
% Cr = ( R*128 - G*107 - B*20 + 32768) >>8
IMG1 = double(IMG1);
IMG_YCbCr = zeros(h,w,3);
for i = 1 : h
for j = 1 : w
IMG_YCbCr(i,j, 1) = bitshift(( IMG1(i,j,1)*76 + IMG1(i,j,2)*150 + IMG1(i,j,3)*29),-8);
IMG_YCbCr(i,j,2) = bitshift((-IMG1(i,j,1)*43 - IMG1(i,j,2)*84 + IMG1(i,j,3)*128 + 32768),-8);
IMG_YCbCr(i,j,3) = bitshift(( IMG1(i,j,1)*128 - IMG1(i,j,2)*107 - IMG1(i,j,3)*20 + 32768),-8);
end
end
% -------------------------------------------------------------------------
% Display Y Cb Cr Channel
IMG_YCbCr = uint8(IMG_YCbCr);
subplot(222); imshow(IMG_YCbCr(:,:,1)); title('Y Channel');
subplot(223); imshow(IMG_YCbCr(:,:,2)); title('Cb Channel');
subplot(224); imshow(IMG_YCbCr(:,:,3)); title('Cr Channel');
Verilog程序
里需要注意的是pipeline,在FPGA中把这套公式拆解成了三个step。
第一步是乘积,会延时1个clk。
第二步是累加,会延时1个clk。
第三部是移位,会延时1个clk。
综上所述,需要使用3个clk的延时来实现pipeline。
`timescale 1ns/1ns
module VIP_RGB888_YCbCr444
(
//global clock
input clk, //cmos video pixel clock
input rst_n, //global reset
//Image data prepred to be processed
input per_img_vsync, //Prepared Image data vsync valid signal
input per_img_href, //Prepared Image data href vaild signal
input [7:0] per_img_red, //Prepared Image red data to be processed
input [7:0] per_img_green, //Prepared Image green data to be processed
input [7:0] per_img_blue, //Prepared Image blue data to be processed
//Image data has been processed
output post_img_vsync, //Processed Image data vsync valid signal
output post_img_href, //Processed Image data href vaild signal
output [7:0] post_img_Y, //Processed Image brightness output
output [7:0] post_img_Cb, //Processed Image blue shading output
output [7:0] post_img_Cr //Processed Image red shading output
);
//--------------------------------------------
/*********************************************
//Refer to full/pc range YCbCr format
Y = R*0.299 + G*0.587 + B*0.114
Cb = -R*0.169 - G*0.331 + B*0.5 + 128
Cr = R*0.5 - G*0.419 - B*0.081 + 128
--->
Y = (76 *R + 150*G + 29 *B)>>8
Cb = (-43*R - 84 *G + 128*B + 32768)>>8
Cr = (128*R - 107*G - 20 *B + 32768)>>8
**********************************************/
//Step 1
reg [15:0] img_red_r0, img_red_r1, img_red_r2;
reg [15:0] img_green_r0, img_green_r1, img_green_r2;
reg [15:0] img_blue_r0, img_blue_r1, img_blue_r2;
always@(posedge clk)
begin
img_red_r0 <= per_img_red * 8'd76;
img_red_r1 <= per_img_red * 8'd43;
img_red_r2 <= per_img_red * 8'd128;
img_green_r0 <= per_img_green * 8'd150;
img_green_r1 <= per_img_green * 8'd84;
img_green_r2 <= per_img_green * 8'd107;
img_blue_r0 <= per_img_blue * 8'd29;
img_blue_r1 <= per_img_blue * 8'd128;
img_blue_r2 <= per_img_blue * 8'd20;
end
//--------------------------------------------------
//Step 2
reg [15:0] img_Y_r0;
reg [15:0] img_Cb_r0;
reg [15:0] img_Cr_r0;
always@(posedge clk)
begin
img_Y_r0 <= img_red_r0 + img_green_r0 + img_blue_r0;
img_Cb_r0 <= img_blue_r1 - img_red_r1 - img_green_r1 + 16'd32768;
img_Cr_r0 <= img_red_r2 - img_green_r2 - img_blue_r2 + 16'd32768;
end
//--------------------------------------------------
//Step 3
reg [7:0] img_Y_r1;
reg [7:0] img_Cb_r1;
reg [7:0] img_Cr_r1;
always@(posedge clk)
begin
img_Y_r1 <= img_Y_r0[15:8];
img_Cb_r1 <= img_Cb_r0[15:8];
img_Cr_r1 <= img_Cr_r0[15:8];
end
//------------------------------------------
//lag 3 clocks signal sync
reg [2:0] per_img_vsync_r;
reg [2:0] per_img_href_r;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n)
begin
per_img_vsync_r <= 0;
per_img_href_r <= 0;
end
else
begin
per_img_vsync_r <= {per_img_vsync_r[1:0], per_img_vsync};
per_img_href_r <= {per_img_href_r[1:0], per_img_href};
end
end
assign post_img_vsync = per_img_vsync_r[2];
assign post_img_href = per_img_href_r[2];
assign post_img_Y = post_img_href ? img_Y_r1 : 8'd0;
assign post_img_Cb = post_img_href ? img_Cb_r1: 8'd0;
assign post_img_Cr = post_img_href ? img_Cr_r1: 8'd0;
endmodule