dim3 dimBlock(8, 8, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
// Warmup
transformKernel<<<dimGrid, dimBlock, 0>>>(dData, width, height, angle);
checkCudaErrors(cudaDeviceSynchronize());
StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
// Execute the kernel
transformKernel<<<dimGrid, dimBlock, 0>>>(dData, width, height, angle);
warmup后的为什么不会马上执行,而在下面执行?
先谢谢版主;P;P