device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;
long accumBit[8];
//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);
accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;
accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;
short globalPos , localPos;
//统计非空像素个数的计数器
short aCounter = 0;
//判断像素是否在目标范围之内
for(int i = pixZ0 ; i <= pixZ1 ; i++)
{
//global position is i / 32
globalPos = i>>5;
//local position is i % 32
localPos = i & 31;
if(accumBit[globalPos]>>localPos & 1)
{
aCounter++;
}
}
return make_int2(aCounter , 0);
}
上面这段代码是设备上的一个kernel,但是如果调用时不知道为什么就会出现驱动重启,然后程序崩掉,花屏。后来进行简化形式的测试:
device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;
long accumBit[8];
//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);
accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;
accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;
short globalPos , localPos;
//统计非空像素个数的计数器
short aCounter = 0;
//以下为改动部分+++++++++++++++++++++++//
//判断像素是否在目标范围之内
globalPos = pixZ0>>5;
localPos = pixZ0 & 31;
aCounter = accumBit[globalPos]>>localPos & 1;
return make_int2(aCounter , 0);
}
后面的代码改为上述形式调用时仍然崩掉,但是直接用下面的形式却可以正常的调用:
device int2 kernelFetchPixelState(short pixIndexX , short pixIndexY , short pixZ0 , short pixZ1)
{
long4 accumBits0 , accumBits1;
long accumBit[8];
//从纹理中读取待判断的像素的位列表
accumBits0 = tex2D(tf00, pixIndexX, pixIndexY);
accumBits1 = tex2D(tf01, pixIndexX, pixIndexY);
accumBit[0] = accumBits0.x;
accumBit[1] = accumBits0.y;
accumBit[2] = accumBits0.z;
accumBit[3] = accumBits0.w;
accumBit[4] = accumBits1.x;
accumBit[5] = accumBits1.y;
accumBit[6] = accumBits1.z;
accumBit[7] = accumBits1.w;
short globalPos , localPos;
//统计非空像素个数的计数器
short aCounter = 0;
//以下为改动部分+++++++++++++++++++++++//
//判断像素是否在目标范围之内
globalPos = 0;//或其它合法常数
localPos = 11;//或其它合法常数
aCounter = accumBit[globalPos]>>localPos & 1;
return make_int2(aCounter , 0);
}
这样调用的话就没有问题了。
有没有哪位达人知道为什么??小弟很是迷惑很是着急啊,程序已经调试老长时间了……:cry2:
[ 本帖最后由 BugRunner 于 2010-8-27 23:05 编辑 ]