新手问一下版主为什么我的程序不能编译

system · 2013 年10 月 16 日 07:25

刚刚学习CUDA编程，按照CUDA TOOKIT5.5文档的步骤新建了一个CUDA 5.5的项目，把项目中自动生成的代码编译了一下能够顺利通过，但是为什么把这些代码注释掉后换成了自己的代码编译就通不过呢？出现error MSB3721错误。
代码如下：
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include <Windows.h>
#define TIMES 1000000
//cudaError_t addWithCuda(int c, const int a, const int b, unsigned int size);
global int CudaCalcFloat( void )
{
float fSum = 0;
for( int i=0; i<1024; i++ )
{
fSum += 3.1415926ffloat(i);
}
__syncthreads();
}///
/global void addKernel(int c, const int a, const int b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}///
int main()
{
printf( “Try cuda 1\n” );
printf( “Calculating using cpu…\n” );
double dSum = 0;
DWORD dwCpuTimeBegin = GetTickCount();
for( int i=0; i<TIMES; i++ )
{
dSum = 0.0;
for( int k=0; k<1024; k++ )
{
dSum += 3.1415926double(k);
}
}
DWORD dwCpuTimeTaken = GetTickCount()-dwCpuTimeBegin;
printf( “Sum = %f, Cpu time taken:%d ms\n”, dSum, dwCpuTimeTaken );
int nThreadsPerBlock = 64;
int nBlockNum = TIMES/nThreadsPerBlock + 1;
CudaCalcFloat<<< nBlockNum, nThreadsPerBlock, 0 >>>();///
/const int arraySize = 5;
const int a[arraySize] = { 1, 2, 3, 4, 5 };
const int b[arraySize] = { 10, 20, 30, 40, 50 };
int c[arraySize] = { 0 };
// Add vectors in parallel.
cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “addWithCuda failed!”);
return 1;
}
printf(“{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n”,
c[0], c[1], c[2], c[3], c[4]);
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaDeviceReset failed!”);
return 1;
}///
return 0;
}
/*/ Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int c, const int a, const int b, unsigned int size)
{
int dev_a = 0;
int dev_b = 0;
int dev_c = 0;
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaSetDevice failed! Do you have a CUDA-capable GPU installed?”);
goto Error;
}
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
cudaStatus = cudaMalloc((void)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
cudaStatus = cudaMalloc((void)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMalloc failed!”);
goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “addKernel launch failed: %s\n”, cudaGetErrorString(cudaStatus));
goto Error;
}

// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaDeviceSynchronize returned error code %d after launching addKernel!\n”, cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaMemcpy failed!”);
goto Error;
}
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);

return cudaStatus;
}//*/
编译错误提示如下：
1>C:\Program Files\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 5.5.targets(592,9): error MSB3721: 命令““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\bin\nvcc.exe” -gencode=arch=compute_10,code="sm_10,compute_10" --use-local-env --cl-version 2010 -ccbin “C:\Program Files\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.5\include" --keep-dir Release -maxrregcount=0 --machine 32 --compile -cudart static -DWIN32 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MD " -o Release\kernel.cu.obj “D:\C++ Programs\TryCuda2\TryCuda2\kernel.cu””已退出，返回代码为 2。
1>
1>生成失败。
1>

[/i][/i][/i]

system · 2013 年10 月 16 日 07:30

其实我自己编的代码如下：
#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <stdio.h>
#include <Windows.h>

#define TIMES 1000000

global int CudaCalcFloat( void )
{
float fSum = 0;
for( int i=0; i<1024; i++ )
{
fSum += 3.1415926ffloat(i);
}
__syncthreads();
}///

int main()
{
printf( “Try cuda 1\n” );
printf( “Calculating using cpu…\n” );
double dSum = 0;
DWORD dwCpuTimeBegin = GetTickCount();
for( int i=0; i<TIMES; i++ )
{
dSum = 0.0;
for( int k=0; k<1024; k++ )
{
dSum += 3.1415926*double(k);
}
}
DWORD dwCpuTimeTaken = GetTickCount()-dwCpuTimeBegin;
printf( “Sum = %f, Cpu time taken:%d ms\n”, dSum, dwCpuTimeTaken );

int nThreadsPerBlock = 64;
int nBlockNum = TIMES/nThreadsPerBlock + 1;
CudaCalcFloat<<< nBlockNum, nThreadsPerBlock, 0 >>>();//*/

return 0;
}
多谢大侠给予解答。

system · 2013 年10 月 16 日 07:32

楼主您好，

请发build log，版主们不是人肉编译器，无法为您人肉编译，请谅解。

（请在VS的debug菜单的选项菜单的“项目和解决方案”里的“构建和运行”里的“构建记录”里将build log从“最小”改成“详细”，然后将build log复制上来）

谢谢。

system · 2013 年10 月 16 日 07:48

老大您好！我已经按照您的要求改成详细的并编译了一下，但由于我不知道哪个是你要的文件，我把所有的都压缩上传了，您打开挑一下行吗？

system · 2013 年10 月 16 日 07:50

敢问楼主，

你的“压缩包”在哪里？

谢谢。

system · 2013 年10 月 16 日 07:54

是不是这个文件？刚才的不知为什么没上传上去，是不是太大了？16M多。

system · 2013 年10 月 16 日 07:57

楼主您好，

您的错误报告是：
D:/C++ Programs/TryCuda2/TryCuda2/kernel.cu(12):
error : a “global” function must have a void return type
我想你看到这里已经明白自己哪里错了。

这提醒了楼主，如果以后有问题，请直接上来就发完整的报告。
甚至有的时候，当你自己拿到报告后，都不需要自己问就知道问题的所在了。
您说是吧。

感谢来访。

system · 2013 年10 月 16 日 09:23

谢谢老大！我是新手，不知道呀，耽误了您的时间。以后有问题还请多指教。

system · 2013 年10 月 16 日 09:26

楼主您好，贵kernel不能直接返回一个int的，请修正。
（您可以给它传递一个结果缓冲区，让它在这个缓冲区内写入一个int作为结果，如果真的需要的话）

希望您下次能直接提供完整记录。
感谢来访。

system · 2013 年10 月 16 日 10:04

谢谢，已经改好并编译运行成功。

system · 2013 年10 月 16 日 10:05

感谢您的来访，祝您夜晚愉快。