为什么核函数执行次数会超过规定次数

for (int i = 0; i < mbs /等于100/ ; i++)
{
n = ii[1] - ii[0]; ii++; // n 恒等于1,因此下边的核函数应该执行100次
MatMult_SecondLoop << <1, n >> > (i, index, n, dev_idxbase, dev_x, dev_v, dev_z);
index += n;
}

global void MatMult_SecondLoop(int i, int index, int n, int* dev_idxbase, double* dev_x, double* dev_v, double* dev_z)
{
int j = dev_idxbase[threadIdx.x + index];
double x1, x2, x3;
x1 = dev_x[3 * j];
x2 = dev_x[3 * j + 1];
x3 = dev_x[3 * j + 2];

dev_z[3 * i] += dev_v[9 * (threadIdx.x + index)] * x1 + dev_v[9 * (threadIdx.x + index) + 3] * x2 + dev_v[9 * (threadIdx.x + index) + 6] * x3;
dev_z[3 * i + 1] += dev_v[9 * (threadIdx.x + index) + 1] * x1 + dev_v[9 * (threadIdx.x + index) + 4] * x2 + dev_v[9 * (threadIdx.x + index) + 7] * x3;
dev_z[3 * i + 2] += dev_v[9 * (threadIdx.x + index) + 2] * x1 + dev_v[9 * (threadIdx.x + index) + 5] * x2 + dev_v[9 * (threadIdx.x + index) + 8] * x3;

printf("%f\n", dev_z[3 * i]);  [b]//测试结果显示一共输出了3100次,而不是设计的100次[/b]

}