今天调试全局同步的函数,如下
157 //GPU lock-free synchronization function
158 device
159 void __gpu_sync(unsigned goalVal, volatile unsigned *Arrayin, volatile unsigned *Arrayout) {
160 // thread ID in a block
161 unsigned tid_in_blk = threadIdx.x * blockDim.y + threadIdx.y;//caculate the thread id in the block.
162 unsigned nBlockNum = gridDim.x * gridDim.y;
163 unsigned bid = blockIdx.x * gridDim.y + blockIdx.y;//caculate the block id in the grid
164 // only thread 0 is used for synchronization
165 if (tid_in_blk == 0) {
166 Arrayin[bid] = goalVal;
167 __threadfence();
168 }
169 if (bid == 1) {
170 if (tid_in_blk < nBlockNum) {//every thread whose id < block number in block 0 is hooked by one block, means to sysnc all threads of all blocks
171 while (Arrayin[tid_in_blk] != goalVal){//wait for accomplishmet of Array[bid] = goalVal from other block
172 //Do nothing here
173 }
174 }
175 __syncthreads();
176 if (tid_in_blk < nBlockNum) {
177 Arrayout[tid_in_blk] = goalVal;
178 __threadfence();
179 }
180 }
181 if (tid_in_blk == 0) {
182 while (Arrayout[bid] != goalVal) {
183 //Do nothing here
184 }
185 }
186 // printf(“waiting for set arrayout[%d] = %d, goalVal = %d, bid: %d, tid_in_blk : %d”, bid, arrayout[bid], goalVal, bid, tid_in_blk);
187 __syncthreads();
188 }
然后在183行死循环,在CUDA-GDB中ctrl+c停止后,无法print变量(没有上下文),printf也没有显示,怎么办,谢谢