main函数的代码如下:
/****************** host to device*****************************/
cudaMemcpy(dev_np,np,2sizeof(int),cudaMemcpyHostToDevice);
//cudaMemcpy(rho,rhoh,ngsizeof(float),cudaMemcpyHostToDevice);
cudaMemcpy2D(dev_x, width1sizeof(float), x, pitch, width1sizeof(float) , height, cudaMemcpyHostToDevice);
/kernel********/
setrho<<<100,1024>>>(dev_x,dev_np,rho,pitch,dev_count_e,dev_count_i);
printf(“%s\n”,cudaGetErrorString(cudaGetLastError()));
/**device to host/
cudaMemcpy(rhoh,rho,ngsizeof(float),cudaMemcpyDeviceToHost);
printf(“%s\n”,cudaGetErrorString(cudaGetLastError()));
下面是cuda函数
global void setrho(floatdev_x,intdev_np,float rho,size_t pitch,intdev_count_e,int* dev_count_i)
{
……
for(int k=0;k<nc;k++)
while (tid<dev_count_e[k])
{
atomicAdd(&rho[i],(i+1.0-xe[tid])*qe/dx);
atomicAdd(&rho[i+1],(xe[tid]-1.0)*qe/dx);
tid++;
}
__syncthreads();
}
我的问题是:kernel运行检错结果是:no error 可是device to host 过程却是unknown error……
它不知道,我也不知道……哪位知道呀?求助各位大知者!