刚刚学习CUDA编程,按照CUDA TOOKIT5.5文档的步骤新建了一个CUDA 5.5的项目,把项目中自动生成的代码编译了一下能够顺利通过,但是为什么把这些代码注释掉后换成了自己的代码编译就通不过呢?出现error MSB3721错误。
代码如下:
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include <Windows.h>
#define TIMES 1000000
//cudaError_t addWithCuda(int c, const int a, const int b, unsigned int size);
global int CudaCalcFloat( void )
{
float fSum = 0;
for( int i=0; i<1024; i++ )
{
fSum += 3.1415926ffloat(i);
}
__syncthreads();
}///
/global void addKernel(int c, const int a, const int b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}///
int main()
{
printf( “Try cuda 1\n” );
printf( “Calculating using cpu…\n” );
double dSum = 0;
DWORD dwCpuTimeBegin = GetTickCount();
for( int i=0; i<TIMES; i++ )
{
dSum = 0.0;
for( int k=0; k<1024; k++ )
{
dSum += 3.1415926double(k);
}
}
DWORD dwCpuTimeTaken = GetTickCount()-dwCpuTimeBegin;
printf( “Sum = %f, Cpu time taken:%d ms\n”, dSum, dwCpuTimeTaken );
int nThreadsPerBlock = 64;
int nBlockNum = TIMES/nThreadsPerBlock + 1;
CudaCalcFloat<<< nBlockNum, nThreadsPerBlock, 0 >>>();///
/const int arraySize = 5;
const int a[arraySize] = { 1, 2, 3, 4, 5 };
const int b[arraySize] = { 10, 20, 30, 40, 50 };
int c[arraySize] = { 0 };
// Add vectors in parallel.
cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “addWithCuda failed!”);
return 1;
}
printf(“{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n”,
c[0], c[1], c[2], c[3], c[4]);
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaDeviceReset failed!”);
return 1;
}///
return 0;
}
/*/ Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int c, const int a, const int b, unsigned int size)
{
int dev_a = 0;
int dev_b = 0;
int dev_c = 0;
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaSetDevice failed! Do you have a CUDA-capable GPU installed?”);
goto Error;
}
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
cudaStatus = cudaMalloc((void)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
cudaStatus = cudaMalloc((void)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “addKernel launch failed: %s\n”, cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaDeviceSynchronize returned error code %d after launching addKernel!\n”, cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
return cudaStatus;
}//*/
编译错误提示如下:
1>C:\Program Files\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 5.5.targets(592,9): error MSB3721: 命令““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\bin\nvcc.exe” -gencode=arch=compute_10,code="sm_10,compute_10" --use-local-env --cl-version 2010 -ccbin “C:\Program Files\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -DWIN32 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\kernel.cu.obj “D:\C++ Programs\TryCuda2\TryCuda2\kernel.cu””已退出,返回代码为 2。
1>
1>生成失败。
1>
[/i][/i][/i]