这是我写的一个 kerner,
pd_DEMCellBox, pd_DES_POS, pd_nebrPPTab, pd_nebrTabLen, d_vOff均为全局内存上的变量,确保已经cudaMalloc成功。
global void SearchOverCells(long *pd_DEMCellBox, int demcellsx,
int demcellsy, int demcellsz,
float *pd_DES_POS,const float rrNebr,
int particles, long *pd_nebrPPTab,
int *pd_nebrTabLen, int *d_vOff)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int ci1, cj1, ci2, cj2;
int m2x, m2y, m2z, m1x, m1y, m1z;
long LL, JJ;
float RelPos[3], DIST;
cj1 = pd_DEMCellBox[i * 81];
if(cj1 == 0)
return;
for(int j=0; j<14; j++)
{
m1x = (i % (demcellsx * demcellsy)) % demcellsx;
m2x = m1x + d_vOff[j*3];
if(m1x == 0)
m1y = (i % (demcellsx * demcellsy)) / demcellsx;
else
m1y = (i % (demcellsx * demcellsy)) / demcellsx + 1;
m2y = + d_vOff[j*3 + 1];
if(i % (demcellsx * demcellsy) == 0)
m1z = i / (demcellsx * demcellsy);
else
m1z = i / (demcellsx * demcellsy) + 1;
m2z = m1z + d_vOff[j*3 + 2];
if(m2x < 0 || m2x >= demcellsx ||
m2y < 0 || m2y >= demcellsy ||
m2z < 0 || m2z >= demcellsz)
continue;
int index = demcellsx * demcellsy * m2z + m2x * m2y + m2x;
cj2 = pd_DEMCellBox[index * 81];
if(cj2 == 0)
continue;
for(ci1=1; ci1<=cj1; ci1++)
{
for(ci2=1; ci2<=cj2; ci2 ++)
{
LL = pd_DEMCellBox[i*81 + ci1];
JJ = pd_DEMCellBox[index*81 + ci2];
if ( m1x!=m2x || m1y!=m2y || m1z!=m2z || LL > JJ)
{
RelPos[0] = pd_DES_POS[JJ] - pd_DES_POS[LL];
RelPos[1] = pd_DES_POS[particles + JJ] - pd_DES_POS[particles + LL];
if(fabs(RelPos[0])>rrNebr || fabs(RelPos[1])>rrNebr)
continue;
RelPos[2] = pd_DES_POS[2particles + JJ] - pd_DES_POS[2particles + LL];
DIST = ABS_V3(RelPos);
if(DIST <= rrNebr )
{
pd_nebrPPTab[(*pd_nebrTabLen) * 2] = LL;
pd_nebrPPTab[(*pd_nebrTabLen) * 2 + 1] = JJ;
(*pd_nebrTabLen)++;
}
}
}
}
}
}
这个kerner老是发射不成,本人做过如下测试。
红色部分代码屏蔽掉后,kerner能发射成功;
将红色部分之外的代码屏蔽掉,kerner内只留红色体代码,kerner也能发射成功运行;
求助!!求解!!