题目:
生成2048*2048个随机单精度实数;
实现两维Real to complex FFT参考代码;
使用OneMKL计算两维Real to complex FFT;
对两维FFT输出数据进行全数据比对;
平均性能数据比对,输出FFT参考代码平均运行时间和oneMKL FFT平均运行时间。
代码:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#include<fftw3.h>
#include<mkl.h>
#include"mkl_vsl.h"
#include"mkl_service.h"
#include"mkl_dfti.h"
int oneMKL_Plan(float *input,MKL_Complex8 *output,int N)
{
int time;
MKL_LONG sizeN[2]={N,N};
MKL_LONG rs[3]={0,N,1};
MKL_LONG cs[3]={0,N/2+1,1};
DFTI_DESCRIPTOR_HANDLE handle=NULL;
DftiCreateDescriptor(&handle,DFTI_SINGLE,DFTI_REAL,2,sizeN);
DftiSetValue(handle,DFTI_PLACEMENT,DFTI_NOT_INPLACE);
DftiSetValue(handle,DFTI_CONJUGATE_EVEN_STORAGE,DFTI_COMPLEX_COMPLEX);
DftiSetValue(handle,DFTI_INPUT_STRIDES,rs);
DftiSetValue(handle,DFTI_OUTPUT_STRIDES,cs);
DftiCommitDescriptor(handle);
clock_t start,end;
start=clock();
DftiComputeForward(handle,input,output);
end=clock();
printf(" Times==1\n");
printf(" oneMKL use %f\n",(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<1000;time++) DftiComputeForward(handle,input,output);
end=clock();
printf(" Times==1000\n");
printf(" oneMKL use %f\n",(double)(end-start)/CLOCKS_PER_SEC);
DftiFreeDescriptor(&handle);
return 0;
}
int FFTW3_Plan(float *input,fftwf_complex *output,int N)
{
int time;
fftw_plan flag;
clock_t start,end;
flag=fftwf_plan_dft_r2c_2d(N,N,input,output,FFTW_MEASURE);
start=clock();
fftwf_execute(flag);
end=clock();
printf(" Times==1\n");
printf(" FFTW3 use %f\n",(double)(end-start)/CLOCKS_PER_SEC);
start=clock();
for(time=0;time<1000;time++) fftwf_execute(flag);
end=clock();
printf(" Times==1000\n");
printf(" FFTW3 use %f\n",(double)(end-start)/CLOCKS_PER_SEC);
//fftwf_destory_plan(plan);
return 0;
}
int main()
{
static const int N=2048;
float *input=(float *)malloc(sizeof(float)*N*N);
VSLStreamStatePtr stream;
vslNewStream(&stream,VSL_BRNG_MT19937,1);
vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,stream,N*N,input,0.0f,1.0f);
//FFTW
fftwf_complex *outputfftw=(fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex)*N*(N/2+1));
oneMKL_Plan(input,outputfftw,N);
//oneMKL
MKL_Complex8 *outputmkl=(MKL_Complex8 *)malloc(sizeof(MKL_Complex8)*N*(N/2+1));
FFTW3_Plan(input,outputmkl,N);
float part_real,part_imag,flag;
flag=0;
for(int i=0;i<(N+2)/2*N;i++)
{
part_real=fabs(outputfftw[i][0]-outputmkl[i].real);
part_imag=fabs(outputfftw[i][1]-outputmkl[i].imag);
if(part_real>0.00001 || part_imag>0.000001) flag=1;
}
if(flag==0) printf(" Residual < 0.000001 , Match\n\n");
else printf(" Residual < 0.000001 , Don't Match\n\n");
free(input);
free(outputmkl);
fftwf_free(outputfftw);
return 0;
}