运行错误

为什么在这一句HANDLE_ERROR( cudaMemcpy( Csph_host, dev_Csph,100 * sizeof( unsigned long long ), cudaMemcpyDeviceToHost ) );运行时会出现这样子的错误the launch time out and was terminated in cuda.cu at line 402(402就是前面那一句)?

楼主您好,根据提示看,有可能是您的cudaMemcpy上一句是一次kernel调用,该kernel运行时间超过了您当前设置的最大允许时间而导致的。

建议的解决方案:
(1)安装nsight visual edition,
(2)从开始菜单找到nsight monitor, 并运行
(3)等待5秒钟让它出现。
(4)在任务栏的右下角通知区域找到nsight monitor图标,使用右键点击它,并选择options
(5)稍等1秒等待该选项窗口出现
(6)在该窗口左侧选择general, 然后在右侧选择TDR Delay选项,改成较大的值,例如300,然后点击确定。
(7)重新启动贵计算机。(请一定要重启,谢谢!)
(8)重新运行贵代码。现在你有300秒的时间运行贵kernel了。一般是足够了。

建议尝试如上操作,并反馈结果。

谢版主的解答,问题解决了。就是感觉似乎比以前运行时要慢,运行时间变长了?这个数改成多少最合适?

楼主您好,修改这个值不会影响您的实际运行时间的,这个只是一个最大的超时值。

建议的解决方案:
您可以实际测试下,您的kernel是否在修改TDR delay后真的会变慢(不会的),以排除心理作用因素。

版主还是402就是上面的那行,我就改了程序中的一个运行参数,在这一行就说什么unknow error in cuda.cu at line 402?我很奇怪为什么改个参数就运行不了?但如果改其他参数都可以运行!

LZ您好,这个报错说明您给的参数不合适,引发了错误。

当然这个回答无法解决任何问题,但是也无法给出进一步的建议了。
您可以检查您修改的参数是否合适。

祝您好运~

版主可这个程序在CPU上是可以运行的,是不是改一下哪方面的设置就可以运行了?我觉得运行不了的原因似乎是由于每个线程内循环次数太多,各个线程没有协调好,而且402的上一句就是核函数

global void transport_kernel( unsigned long longdev_x,unsigned intdev_a,unsigned long long *Csph ){

int tid = threadIdx.x + blockIdx.x * blockDim.x;

/* Propagation parameters /
float x, y, z; /
photon position /
float ux, uy, uz; /
photon trajectory as cosines /
float uxx, uyy, uzz; /
temporary values used during SPIN /
float s; /
step sizes. s = -log(RND)/mus [cm] /
float costheta; /
cos(theta) /
float sintheta; /
sin(theta) /
float cospsi; /
cos(psi) /
float sinpsi; /
sin(psi) /
float psi; /
azimuthal angle */

unsigned int W; /* photon weight /
unsigned int absorb; /
weighted deposited in a step due to absorption /
short photon_status; /
flag = ALIVE=1 or DEAD=0 */

/* other variables */

float mua; /* absorption coefficient [cm^-1] /
float mus; /
scattering coefficient [cm^-1] /
float g; /
anisotropy [-] /
float albedo; /
albedo of tissue */

short NR; /* number of radial positions /
float radial_size; /
maximum radial size /
float r; /
radial position /
float dr; /
radial bin size /
short ir; /
index to radial position */

/* dummy variables /
float rnd; /
assigned random value 0-1 */

float temp; /* dummy variables */

/**** INPUT
Input the optical properties
Input the bin and array sizes
Input the number of photons
*****/

mua = 1.0; /* cm^-1 /
mus = 100.0; /
cm^-1 */
g = 0.90;

radial_size = 3.0; /* cm, total range over which bins extend /
NR = 100; /
set number of bins. /
/
IF NR IS ALTERED, THEN USER MUST ALSO ALTER THE ARRAY DECLARATION TO A SIZE = NR + 1. /
dr = radial_size/NR; /
cm */
albedo = mus/(mus + mua);

/**** INITIALIZATIONS
*****/

/**** RUN
Launch N photons, initializing each one before progation.
*****/

/**** LAUNCH
Initialize photon position and trajectory.
Implements an isotropic point source.
*****/

W = (unsigned int)((double)0xffffffff); /* set photon weight to one /
photon_status = ALIVE; /
Launch an ALIVE photon */

x = 0; /* Set photon position to origin. */
y = 0;
z = 0;

/* Randomly set photon trajectory to yield an isotropic source. /
costheta = 2.0
rand_MWC_co(&dev_x[tid],&dev_a[tid]) - 1.0;
sintheta = sqrt(1.0 - costhetacostheta); / sintheta is always positive /
psi = 2.0
PIrand_MWC_co(&dev_x[tid],&dev_a[tid]);
ux = sintheta
cos(psi);
uy = sintheta*sin(psi);
uz = costheta;

/* HOP_DROP_SPIN_CHECK
Propagate one photon until it dies as determined by ROULETTE.
*******/
do {

/**** HOP
Take step to new position
s = stepsize
ux, uy, uz are cosines of current photon trajectory
****/
while ((rnd = rand_MWC_co(&dev_x[tid],&dev_a[tid])) <= 0.0); /
yields 0 < rnd <= 1 /
s = -log(rand_MWC_oc(&dev_x[tid],&dev_a[tid]))/(mua + mus); /
Step size. Note: log() is base e /
x += s * ux; /
Update positions. */
y += s * uy;
z += s * uz;

/**** DROP
Drop photon weight (W) into local bin.
***/
absorb =(unsigned int)( W
(1 - albedo)); /
photon weight absorbed at this step /
W -= absorb; /
decrement WEIGHT by amount absorbed */

/* spherical /
r = sqrt(x
x + yy + zz); /* current spherical radial position /
ir = (short)(r/dr); /
ir = index to spatial bin /
if (ir >= NR) ir = NR; /
last bin is for overflow /
AtomicAddULL( &Csph[ir], absorb ); /
DROP absorbed weight into bin */

/**** SPIN
Scatter photon into new trajectory defined by theta and psi.
Theta is specified by cos(theta), which is determined
based on the Henyey-Greenstein scattering function.
Convert theta and psi into cosines ux, uy, uz.
****/
/
Sample for costheta /
rnd = rand_MWC_oc(&dev_x[tid],&dev_a[tid]);
if (g == 0.0)
costheta = 2.0
rnd - 1.0;
else {
float temp = (1.0 - gg)/(1.0 - g + 2grnd);
costheta = (1.0 + g
g - temptemp)/(2.0g);
}
sintheta = sqrt(1.0 - costhetacostheta); / sqrt() is faster than sin(). */

/* Sample psi. /
psi = 2.0
PIrand_MWC_oc(&dev_x[tid],&dev_a[tid]);
cospsi = cos(psi);
if (psi < PI)
sinpsi = sqrt(1.0 - cospsi
cospsi); /* sqrt() is faster than sin(). /
else
sinpsi = -sqrt(1.0 - cospsi
cospsi);

/* New trajectory. /
if (1 - fabs(uz) <= ONE_MINUS_COSZERO) { /
close to perpendicular. /
uxx = sintheta * cospsi;
uyy = sintheta * sinpsi;
uzz = costheta * SIGN(uz); /
SIGN() is faster than division. /
}
else { /
usually use this option */
temp = sqrt(1.0 - uz * uz);
uxx = sintheta * (ux * uz * cospsi - uy * sinpsi) / temp + ux * costheta;
uyy = sintheta * (uy * uz * cospsi + ux * sinpsi) / temp + uy * costheta;
uzz = -sintheta * cospsi * temp + uz * costheta;
}

/* Update trajectory */
ux = uxx;
uy = uyy;
uz = uzz;

/**** CHECK ROULETTE
If photon weight below THRESHOLD, then terminate photon using Roulette technique.
Photon has CHANCE probability of having its weight increased by factor of 1/CHANCE,
and 1-CHANCE probability of terminating.
*****/
if (W < THRESHOLD) {
if (rand_MWC_co(&dev_x[tid],&dev_a[tid]) <= CHANCE)
W /= CHANCE;
else photon_status = DEAD;
}

} /* end STEP_CHECK_HOP_SPIN */
while (photon_status == ALIVE);

}/end of kernel/

其中就是mus那个参数等于零时可以运行,等于其他的数时一般就不行了就会出现我说的那种错误?

LZ您好,您这已经超出了CUDA技术讨论的范畴,您这是某个具体数值算法实现的参数讨论,我不懂该方法,亦无法看出这里有或者没有问题。

一般地,您需要自己确定某算法实现中某参数的可行范围。

如果您也不确定该参数的合理范围,那么死办法就是逐步运行调试,检查中间变量的值,看问题在哪里。