下面是个 kernel函数,nisight调试出错,显示:error = access violation on load (shared memory)。在调用这个kernel的函数中,用cudaStatus = cudaDeviceSynchronize(); 检查,显示cudaDeviceSynchronize returned error code 30。
琢磨好久了,无果。请问,是什么个错误呢? global static void ComputeMatrix(BYTE* NewImage, int LocalImageWidth,int LocalImageHeight,float final_sum1,int GPUPMH )
{
int xIndex=__mul24(blockDim.x,blockIdx.x)+threadIdx.x;
int yIndex=__mul24(blockDim.y,blockIdx.y)+threadIdx.y;
// int xIndex=blockDim.xblockIdx.x+threadIdx.x;
// int yIndex=blockDim.yblockIdx.y+threadIdx.y;
int tid_in_x=threadIdx.x;
int tid_in_y=threadIdx.y;
int tid_in_block=__mul24(threadIdx.y,blockDim.x)+threadIdx.x;
// int tid_in_block=threadIdx.yblockDim.x+threadIdx.x;
unsigned int index=yIndexLocalImageWidth+xIndex; shared int sin[K][K]; //图像 shared int sgrayCoMatrixRD[G][G];//灰度共生矩阵
sin[tid_in_x][tid_in_y]=0;
__syncthreads();//块内线程同步
oMatrixRD[tid_in_x][tid_in_y]=0;
__syncthreads();//块内线程同步
//传值
sin[tid_in_x][tid_in_y]=(unsigned int)NewImage[index]; //听到这不动 sin 不变 tid _in_x 不变 都是0
__syncthreads();//块内线程同步
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x-D][tid_in_y+D]][sin[tid_in_x][tid_in_y]],1);
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x][tid_in_y]][sin[tid_in_x-D][tid_in_y+D]],1);
GPUPMH[index]=sgrayCoMatrixRD[tid_in_x][tid_in_y];
__syncthreads();//块内线程同步
哦 我翻了一下,那时候没写错
if((tid_in_x-D>0)&&(tid_in_y+D<G)&&(tid_in_x-D<G)&&(tid_in_y+D<G))
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x-D][tid_in_y+D]][sin[tid_in_x][tid_in_y]],1);
atomicAdd(&sgrayCoMatrixRD[sin[tid_in_x][tid_in_y]][sin[tid_in_x-D][tid_in_y+D]],1);
判断 sin 里边的参数x y 方向都是[0,G)。