各位好!
CUDA多维指针怎么寻址呢?
void print_cpu(double **cv,double &a)
{
a+=cv[2][56];
}
global void print_gpu_1(double *TCV,double *e)
{
int bid = blockDim.x * blockIdx.x + threadIdx.x;
//cuPrintf(“TCV:%lf\n”,TCV[2][bid]);
if(bid == 56)
*e=TCV[bid];
}
global void print_gpu_2(double *TCV[4],double *e)
{
int bid = blockDim.x * blockIdx.x + threadIdx.x;
if(bid == 56)
*e+=TCV[2][bid]+1;
}
int main()
{
size_t memSize;
convert();
double *e;
memSize=100*sizeof(double);
for(int i=0;i<MN;i++){
//##
for(int j=0;j<4;j++){
cudaMalloc((void**)&TCV[i][j],memSize);
}
for(int j=0;j<4;j++){
cudaMemcpy(TCV[i][j],conv_cv[i][j] ,memSize,cudaMemcpyHostToDevice);
}
}
cudaMalloc((void**)&e,sizeof(double));
int GridSize =100/blockSize;
print_gpu_1<<<GridSize,blockSize>>>(TCV[2][2],e);
cudaThreadSynchronize();
double de;
cudaMemcpy(&de, e, sizeof(double), cudaMemcpyDeviceToHost);
printf(“gpu 1 %lf\n\n”,de);
print_gpu_2<<<GridSize,blockSize>>>(TCV[2],e);
cudaThreadSynchronize();
cudaMemcpy(&de, e, sizeof(double), cudaMemcpyDeviceToHost);
printf(“gpu 2 %lf\n\n”,de);
double a=0;
print_cpu(conv_cv[2],a);
printf(“cpu %lf\n”,a);
cudaPrintfDisplay(stdout, true);
return 0;
}
代码中printf_cpu和print_gpu_1 OK,print_gpu_2 not OK
如果希望令printf_gpu_2实现print_cpu的寻址方式,CUDA中应该怎么做呢?