// Build color pallet
// 每个block为 32*16个线程 ,显卡的计算能力2.0
global void buildColorPallet(float *img3f, size_t pitchSrc, int idx1i, size_t pitchDst, int width, int height,
int d_PartialHistograms, int binNum)
{
extern shared int s_Hist[];
unsigned int tid = threadIdx.yblockDim.x,+threadIdx.x;
#pragma unroll 4
for(int i = tid; i < binNum ; i += blockDim.xblockDim.y
s_Hist[i] = 0;
__syncthreads();
unsigned int idx = blockIdx.xblockDim.x+threadIdx.x);
unsigned int idy = blockIdx.yblockDim.y+threadIdx.y);
if( idx >= width || idy >= height)
return;
unsigned int id = idx + idy * pitchDst;
unsigned int imSize = width * height;
int data = __float2int_rn(img3f[id])+ __float2int_rn(img3f[id + imSize]) + __float2int_rn(img3f[id + 2 * imSize ] );
atomicAdd( s_Hist + data, 1);
__syncthreads();
idx1i[id] = data;
d_PartialHistograms[UMAD(blockIdx.y, gridDim.x, blockIdx.x) * binNum + data] = s_Hist[data];
}
问题是,s_Hist中的值全为0,atomicAdd( s_Hist + data, 1)这个操作就好像没有,请问一下,问题出在哪?
[/i]