1>C:\Program Files\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 5.0.targets(592,9): error MSB3721: 命令““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\bin\nvcc.exe” -gencode=arch=compute_10,code="sm_10,compute_10" -gencode=arch=compute_20,code="sm_20,compute_20" -gencode=arch=compute_30,code="sm_30,compute_30" -gencode=arch=compute_35,code="sm_35,compute_35" --use-local-env --cl-version 2010 -ccbin “C:\Program Files\Microsoft Visual Studio 10.0\VC\bin” -I"./" -I"…/…/common/inc" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -G --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -g -DWIN32 -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MTd " -o “Win32/Debug/template.cu.obj” “C:\ProgramData\NVIDIA Corporation\CUDA Samples\my Cuda\template\template.cu””已退出,返回代码为 2。
代码如下:
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include
#define F(x) (x^2-x+1)
// includes CUDA
#include <cuda_runtime.h>
// includes, project
#include <helper_cuda.h>
#include <helper_functions.h> // helper functions for SDK examples
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest(int argc, char **argv);
extern “C”
void computeGold(float *reference, float *idata, const unsigned int len);
////////////////////////////////////////////////////////////////////////////////
//! Simple test kernel for device functionality
//! @param g_idata input data in global memory
//! @param g_odata output data in global memory
////////////////////////////////////////////////////////////////////////////////
device void FindAandB(float a,floatb)
{
int tid=threadIdx.x;
int bid=blockIdx.x;
float New_a=a+(b-a)/64(tid+bidblockDim.x);
float New_b=a+(b-a)/64(tid+bidblockDim.x+1);
if((New_aNew_a-New_a+1) (New_b*New_b-New_b+1)<0)
{
a[0]=New_a;
b[0]=New_b;
}
}
global void
FindAnswer(float *g_A,float *g_B, float g_C)
{
int tid=blockIdx.xblockDim.x+threadIdx.x;
float a=*g_A;
float b=*g_B;
if(tid==0)
{
do
{
FindAandB(g_A,g_B);
}
while((&g_A-&g_B)<10^6);
if(a==g_A[0]&&b==g_B[0])
{
printf(“在该范围内无解 \n”);
}
else
{
g_C[0]=(*g_A+*g_B)/2;
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main(int argc, char **argv)
{
runTest(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest(int argc, char **argv)
{
bool bTestResult = true;
printf(“%s Starting…\n\n”, argv[0]);
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
StopWatchInterface *timer = 0;
sdkCreateTimer(&timer);
sdkStartTimer(&timer);
unsigned int num_threads = 32;
unsigned int mem_size = sizeof(float) *1;
// allocate host memory
float *h_A = (float *) malloc(mem_size);
float *h_B = (float *) malloc(mem_size);
// initalize the memory
h_A[0]=0;
h_B[0]=1;
// allocate device memory
float *d_A;
float *d_B;
checkCudaErrors(cudaMalloc((void **) &d_A, mem_size));
checkCudaErrors(cudaMalloc((void **) &d_B, mem_size));
// copy host memory to device
checkCudaErrors(cudaMemcpy(d_A, h_A, mem_size,
cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(d_B, h_B, mem_size,
cudaMemcpyHostToDevice));
// allocate device memory for result
float *d_C;
checkCudaErrors(cudaMalloc((void **) &d_C, mem_size));
// setup execution parameters
dim3 grid(2, 1, 1);
dim3 threads(num_threads, 1, 1);
// execute the kernel
FindAnswer<<<grid,thread>>>(d_A, d_B,d_C);
// check if kernel execution generated and error
getLastCudaError(“Kernel execution failed”);
// allocate mem for the result on host side
float *h_C = (float *) malloc(mem_size);
// copy result from device to host
checkCudaErrors(cudaMemcpy(h_C, d_C, sizeof(float) * num_threads,
cudaMemcpyDeviceToHost));
sdkStopTimer(&timer);
printf(“Processing time: %f (ms)\n”, sdkGetTimerValue(&timer));
sdkDeleteTimer(&timer);
printf(“解为: \n”,h_C);
// compute reference solution
//float *reference = (float *) malloc(mem_size);
//computeGold(reference, h_idata, num_threads);
// check result
/* if (checkCmdLineFlag(argc, (const char **) argv, “regression”))
{
// write file for regression test
sdkWriteFile(“./data/regression.dat”, h_odata, num_threads, 0.0f, false);
}
else
{
// custom output handling when no regression test running
// in this case check if the result is equivalent to the expected soluion
bTestResult = compareData(reference, h_odata, num_threads, 0.0f, 0.0f);
}*/
// cleanup memory
free(h_A);
free(h_B);
free(h_C);
//free(reference);
checkCudaErrors(cudaFree(d_A));
checkCudaErrors(cudaFree(d_B));
checkCudaErrors(cudaFree(d_C));
cudaDeviceReset();
exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
我想用并行计算求方程x^2-x+1=0的跟,使用templates模版修改后,就出现了这问题我不知道问题出在哪了