新手求助调试程序

在编译自己的程序时出现下面的错误,想问下怎么解决这个问题:
nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc
/tmp/tmpxft_000050e7_00000000-13_first.o: In function main': tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to cutCheckCmdLineFlag’
/tmp/tmpxft_000050e7_00000000-13_first.o: In function runTest(int, char**)': tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to cutGetCmdLineArgumenti’
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to `cutCheckCmdLineFlag’
collect2: ld 返回 1

程序为:first.cu
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cutil.h>
#include “first_kernel.cu”

void runTest(int argc,char** argv);

int main(int argc,char** argv){
runTest(argc,argv);
CUT_EXIT(argc,argv);
}

void runTest(int argc,char** argv){
CUT_DEVICE_INIT(argc,argv);
unsigned int num_blocks=4;
unsigned int num_threads=4;

unsigned int mem_size=sizeof(float)num_threadsnum_blocks;
float* h_idata=(float*)malloc(mem_size);
float* h_odata=(float*)malloc(mem_size);

“first.cu” 53L, 1273C 已写入
[root@lab519 test_gpu]# ls
first.cu first_kernel.cu
[root@lab519 test_gpu]# nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc
/tmp/tmpxft_000050a6_00000000-13_first.o: In function main': tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to cutCheckCmdLineFlag’
/tmp/tmpxft_000050a6_00000000-13_first.o: In function runTest(int, char**)': tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to cutGetCmdLineArgumenti’
tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to cutCheckCmdLineFlag' collect2: ld 返回 1 [root@lab519 test_gpu]# find -name "cutil32.so" [root@lab519 test_gpu]# find -name "cutil32.lib" [root@lab519 test_gpu]# whereis cutil32 cutil32: [root@lab519 test_gpu]# echo PATH PATH [root@lab519 test_gpu]# nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc /tmp/tmpxft_000050e7_00000000-13_first.o: In function main’:
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to cutCheckCmdLineFlag' /tmp/tmpxft_000050e7_00000000-13_first.o: In function runTest(int, char**)‘:
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to cutGetCmdLineArgumenti' tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to cutCheckCmdLineFlag’
collect2: ld 返回 1
[root@lab519 test_gpu]# vim first.cu
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cutil.h>
#include “first_kernel.cu”

void runTest(int argc,char** argv);

int main(int argc,char** argv){
runTest(argc,argv);
CUT_EXIT(argc,argv);
}

void runTest(int argc,char** argv){
CUT_DEVICE_INIT(argc,argv);
unsigned int num_blocks=4;
unsigned int num_threads=4;

unsigned int mem_size=sizeof(float)num_threadsnum_blocks;
float* h_idata=(float*)malloc(mem_size);
float* h_odata=(float*)malloc(mem_size);

float* d_idata;
CUDA_SAFE_CALL(cudaMalloc((void**)&d_idata,mem_size));
float* d_odata;
CUDA_SAFE_CALL(cudaMalloc((void**)&d_odata,mem_size));

for(unsigned int i=0;i<num_threads*num_blocks;i++)
h_idata[i]=1.0f;
CUDA_SAFE_CALL(cudaMemcpy(d_idata,h_idata,mem_size,cudaMemcpyHostToDevice));

dim3 grid(num_blocks,1,1);
dim3 threads(num_threads,1,1);

testKernel<<<grid,threads,mem_size>>>(d_idata,d_odata);

CUT_CHECK_ERROR(“Kernel execution failed”);

CUDA_SAFE_CALL(cudaMemcpy(h_odata,d_odata,mem_size,cudaMemcpyDeviceToHost));

for(unsigned int i=0;i<num_blocks;i++){
for(unsigned int j=0;j<num_threads;j++){
printf(“%5.0f”,h_odata[i*num_threads+j]);
}
printf (“\n”);
}

free(h_idata);
free(h_odata);
CUDA_SAFE_CALL(cudaFree(d_idata));
CUDA_SAFE_CALL(cudaFree(d_odata));
}

程序first_kernel.cu为:
#ifndef FIRST_KERNEL_H
#define FIRST_KERNEL_H

global void
testKernel(float* g_idata,float* g_odata){

extern shared float sdata;
const unsigned int bid=blockIdx.x;
const unsigned int tid_in_block=threadIdx.x;
const unsigned int tid_in_grid=blockDim.x*blockIdx.x+threadIdx.x;

sdata[tid_in_block]=g_idata[tid_in_grid];
__syncthreads();

sdata[tid_in_block]*=(float)bid;
__syncthreads();

g_odata[tid_in_grid]=sdata[tid_in_block];
}

#endif

求大家帮忙解决。

暂时解决方案为:不采用CUTIL,把它的语句都注释掉就可以了。

你使用<cutil_inline.h>头文件