#define ROW 256
#define COL 256
int main(int argc, char* argv)
{
/***********************************************************************
***************** 256256 2D FFT **************************
/***********************************************************************/
double idata_h = (double )malloc(ROWCOLsizeof(double));
memset(idata_h,0,ROWCOL*sizeof(double));
double2 *idataC_h = (double2 *)malloc(ROW*COL*sizeof(double2));
memset(idataC_h,0,ROW*COL*sizeof(double2));
double2 *odata_h = (double2 *)malloc(ROW*COL*sizeof(double2));
memset(odata_h,0,ROW*COL*sizeof(double2));
double2 *odataC_h = (double2 *)malloc(ROW*COL*sizeof(double2));
memset(odataC_h,0,ROW*COL*sizeof(double2));
for (int i = 0; i < ROW*COL; i++)
{
idataC_h[i].x=idata_h[i] = rand();
idataC_h[i].y=0;
}
double *idata_d;
CudaSafeCall(cudaMalloc((void **)&idata_d, ROW*COL*sizeof(double)));
CudaSafeCall(cudaMemset((void *)idata_d, 0 ,ROW*COL * sizeof(double)));
double2 *odata_d;
CudaSafeCall(cudaMalloc((void **)&odata_d, ROW*COL*sizeof(double2)));
CudaSafeCall(cudaMemset((void *)odata_d, 0 ,ROW*COL * sizeof(double2)));
double2 *idataC_d;
CudaSafeCall(cudaMalloc((void **)&idataC_d, ROW*COL*sizeof(double2)));
CudaSafeCall(cudaMemset((void *)idataC_d, 0 ,ROW*COL * sizeof(double2)));
double2 *odataC_d;
CudaSafeCall(cudaMalloc((void **)&odataC_d, ROW*COL*sizeof(double2)));
CudaSafeCall(cudaMemset((void *)odataC_d, 0 ,ROW*COL * sizeof(double2)));
//拷贝内存信号到显存
CudaSafeCall(cudaMemcpy(idata_d, idata_h, ROW*COL * sizeof(double), cudaMemcpyHostToDevice));
CudaSafeCall(cudaMemcpy(idataC_d, idataC_h, ROW*COL * sizeof(double2), cudaMemcpyHostToDevice));
printf("2D ROW = %d COL = %d points fft start\n",ROW,COL);
cufftHandle planD2Z; //创建CUFFT D2Z句柄
checkCudaErrors(cufftPlan2d(&planD2Z, ROW,COL, CUFFT_D2Z));
cufftHandle planZ2Z; //创建CUFFT Z2Z句柄
checkCudaErrors(cufftPlan2d(&planZ2Z, ROW,COL, CUFFT_Z2Z));
checkCudaErrors(cufftExecD2Z(planD2Z, idata_d, odata_d));
checkCudaErrors(cufftExecZ2Z(planZ2Z, idataC_d, odataC_d,CUFFT_FORWARD));
//拷贝显存到内存信号
CudaSafeCall(cudaMemcpy(odata_h,odata_d,ROW*COL * sizeof(double2),cudaMemcpyDeviceToHost));
CudaSafeCall(cudaMemcpy(odataC_h,odataC_d,ROW*COL * sizeof(double2),cudaMemcpyDeviceToHost));
//验证结果的正确性
for(int i=0;i<ROW*COL;i++)
{
if ((fabs(odata_h[i].x - odataC_h[i].x)>0.1)||(fabs(odata_h[i].y - odataC_h[i].y)>0.1))
{
printf("fft restult is wrong\n");
}
}
printf("2D ROW = %d COL = %d points fft finishes\n",ROW,COL);
cudaFree(idata_d);
cudaFree(odata_d);
cudaFree(idataC_d);
cudaFree(odataC_d);
free(idata_h);
free(odata_h);
free(idataC_h);
free(odataC_h);
cudaDeviceReset();
return 0;
}
以上代码是想完成idata_h是double型的矩阵,idataC_h是复数数据,其实部与idata_h相等,虚部为0,对idata_h和idataC_h做FFT,计算的结果最终存入odata_h,odataC_h中进行比较发现前128项相等,从129项就不等了?帮我看那看是不是程序哪里出了问题?