急求解,请教达人kernel问题……

device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;

long accumBit[8];

//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);

accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;

accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;

short globalPos , localPos;
 
//统计非空像素个数的计数器
short aCounter = 0;

//判断像素是否在目标范围之内
for(int i = pixZ0 ; i <= pixZ1 ; i++)
{
	//global position is i / 32
	globalPos = i>>5;

	//local position is i % 32
	localPos  = i & 31;

	if(accumBit[globalPos]>>localPos & 1)
	{
		aCounter++;
	}
}

return make_int2(aCounter , 0);

}

上面这段代码是设备上的一个kernel,但是如果调用时不知道为什么就会出现驱动重启,然后程序崩掉,花屏。后来进行简化形式的测试:
device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;

long accumBit[8];

//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);

accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;

accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;

short globalPos , localPos;
 
//统计非空像素个数的计数器
short aCounter = 0;

//以下为改动部分+++++++++++++++++++++++//
//判断像素是否在目标范围之内
globalPos = pixZ0>>5;
localPos   = pixZ0 & 31;
aCounter = accumBit[globalPos]>>localPos & 1;
return make_int2(aCounter , 0);

}
后面的代码改为上述形式调用时仍然崩掉,但是直接用下面的形式却可以正常的调用:
device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;

long accumBit[8];

//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);

accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;

accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;

short globalPos , localPos;
 
//统计非空像素个数的计数器
short aCounter = 0;

//以下为改动部分+++++++++++++++++++++++//
//判断像素是否在目标范围之内
globalPos = 0;//或其它合法常数
localPos = 11;//或其它合法常数
aCounter = accumBit[globalPos]>>localPos & 1;
return make_int2(aCounter , 0);

}
这样调用的话就没有问题了。

有没有哪位达人知道为什么??小弟很是迷惑很是着急啊,程序已经调试老长时间了……:cry2:

[ 本帖最后由 BugRunner 于 2010-8-27 23:05 编辑 ]