c JPEG 1D DCT 优化二（AAN）

news2025/12/28 11:07:54

这两个图可能就是AAN 的数学模型

优化DCT就是用代码实现矩阵9,10

9和10已经把64个系数缩小到一半32个了。光从这两图可看出，优化后乘法少了64-32+4=36个，加法少了64-32-8=24。估计优化时间可少百分之40左右。o[0]的4个cos系数都是1，可省4个乘法。

实际编码640×480 的图片，程序执行时间缩短为0.13秒。

要想减少DCT时间就要尽量减少DCT函数的代码量，可以不用查表直接赋值。

最后一种优化就是象ffmpeg一样用汇编写DCT函数部分了。这种就搞不定了。

下一个目标，h264!

实现9,10 矩阵代码：


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define PI 3.1415926

int main(void){
	//cs:cos(PI*(2*n+1)*k/16)
	
	/*	double cs[64]={1,      1,     1,    1,    1,     1,     1,      1,
	0.98,  0.83, 0.56, 0.20, -0.20, -0.56,  -0.83, -0.98,
	0.92,  0.38, -0.38, -0.92, -0.92, -0.38, 0.38, 0.92 ,
	0.83,  -0.20, -0.98, -0.56, 0.56, 0.98,  0.20, -0.83,
	0.71,  -0.71, -0.71, 0.71,  0.71,  -0.71, -0.71,0.71,
	0.56,  -0.98, 0.20, 0.83,  -0.83, -0.20, 0.98, -0.56,
	0.38, -0.92,  0.92, -0.38, -0.38,  0.92, -0.92, 0.38,
	0.20,  -0.56, 0.83, -0.98, 0.98,   -0.83, 0.56,  -0.20
	};
	
	//--------------1D DCT-----------------------------------------
	int  DCT(double i[8],double o[8]){       //ID DCT  参数类型不能用unsigned char ，因为中间系数已超char取值范围
	double s=0.0;
	
	for(int k=0;k<8;k++){
	for(int n=0;n<8;n++){
	s=s+i[n]*cs[k*8+n];  //查cs表
	}
	
	if(k==0){
	s=s*(1.0/(2*sqrt(2)));
	}else{
	s=s*(1.0/2);
	}
	
	o[k]=s;
	s=0.0;
	}
	return 0;
	
	}*/
	
	double cs1[16]={1,      1,     1,    1, 
		0.92,  0.38, -0.38, -0.92,
		0.71,  -0.71, -0.71, 0.71,
		0.38, -0.92,  0.92, -0.38, 
		
	};
	double cs2[16]={
		0.98,  0.83, 0.56, 0.20, 
		0.83,  -0.20, -0.98, -0.56,
		0.56,  -0.98, 0.20, 0.83,  
		0.20,  -0.56, 0.83, -0.98, 
	};
	
	int  DCT(double i[8],double o[8]){       //采用AAN 方式
		double z0=i[0]+i[7];
		double  z1=i[0]-i[7];
		double z2=i[1]+i[6];
		double z3=i[1]-i[6];
		double z4=i[2]+i[5];
		double z5=i[2]-i[5];
		double z6=i[3]+i[4];
		double z7=i[3]-i[4];
		
		o[0]=(cs1[0]*z0+cs1[1]*z2+cs1[2]*z4+cs1[3]*z6)*(1.0/(2*sqrt(2)));
		o[1]=(cs2[0]*z1+cs2[1]*z3+cs2[2]*z5+cs2[3]*z7)*(1.0/2);
		
		o[2]=(cs1[4]*z0+cs1[5]*z2+cs1[6]*z4+cs1[7]*z6)*(1.0/2);
		o[3]=(cs2[4]*z1+cs2[5]*z3+cs2[6]*z5+cs2[7]*z7)*(1.0/2);
		
		o[4]=(cs1[8]*z0+cs1[9]*z2+cs1[10]*z4+cs1[11]*z6)*(1.0/2);
		o[5]=(cs2[8]*z1+cs2[9]*z3+cs2[10]*z5+cs2[11]*z7)*(1.0/2);
		
		o[6]=(cs1[12]*z0+cs1[13]*z2+cs1[14]*z4+cs1[15]*z6)*(1.0/2);
		o[7]=(cs2[12]*z1+cs2[13]*z3+cs2[14]*z5+cs2[15]*z7)*(1.0/2);
		
		return 0;
	}
	
	
//--------------------------------------------------------------------
	double i[64]={
		-76,-73,-67,-62,-58,-67,-64,-55,
		-65,-69,-73,-38,-19,-43,-59,-56,
		-66,-69,-60,-15,16,-24,-62,-55,
		-65,-70,-57,-6,26,-22,-58,-59,
		-61,-67,-60,-24,-2,-40,-60,-58,
		-49,-63,-68,-58,-51,-60,-70,-53,
		-43,-57,-64,-69,-73,-67,-63,-45,
		-41,-49,-59,-60,-63,-52,-50,-34
	};
	
//-------------8行分别1D DCT---------------------
	
	double w[64]={};      //中间8×8
	
	for(int a=0;a<64;a=a+8){
		double ls_o[8]={};
		double ls_i[8]={};
		memcpy(ls_i,&(i[a]),64);
		DCT(ls_i,ls_o);
		memcpy(&(w[a]),ls_o,64);
	}
	
//----------对中间8×8 列1D DCT-------------------------
	
	double zj[8][8]={};    //取中间w的8个8列
	int t=0;
	for(int a=0;a<8;a++){
		for(int b=0;b<8;b++){
			zj[t][b]=w[b*8+a];
		}
		t++;
	}
	
	double ll[64]={};      //现在的列是水平放置的，也就是列变成了行，要转为列
	
	for(int a=0;a<8;a++){    //对8列1D DCT
		double zz[8]={};
		DCT(zj[a],zz);
		memcpy(&(ll[8*a]),zz,64);
		
	}
	
	int k=0;
	double  out[64]={};          //2D DCT 系数
	for(int a=0;a<8;a++){
		for(int b=0;b<8;b++){
			out[8*b+a]=ll[k];
			k++;
		}
	}
	
	
//----------显示--------------------------------------------
	for(int a=0;a<8;a++){
		for(int b=0;b<8;b++){
			printf("%f ,",out[a*8+b]);
		}
		puts("");
		
	}
	
	return 0;
}