在编译自己的程序时出现下面的错误,想问下怎么解决这个问题:
nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc
/tmp/tmpxft_000050e7_00000000-13_first.o: In function main': tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to
cutCheckCmdLineFlag’
/tmp/tmpxft_000050e7_00000000-13_first.o: In function runTest(int, char**)': tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to
cutGetCmdLineArgumenti’
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to `cutCheckCmdLineFlag’
collect2: ld 返回 1
程序为:first.cu
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cutil.h>
#include “first_kernel.cu”
void runTest(int argc,char** argv);
int main(int argc,char** argv){
runTest(argc,argv);
CUT_EXIT(argc,argv);
}
void runTest(int argc,char** argv){
CUT_DEVICE_INIT(argc,argv);
unsigned int num_blocks=4;
unsigned int num_threads=4;
unsigned int mem_size=sizeof(float)num_threadsnum_blocks;
float* h_idata=(float*)malloc(mem_size);
float* h_odata=(float*)malloc(mem_size);
“first.cu” 53L, 1273C 已写入
[root@lab519 test_gpu]# ls
first.cu first_kernel.cu
[root@lab519 test_gpu]# nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc
/tmp/tmpxft_000050a6_00000000-13_first.o: In function main': tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to
cutCheckCmdLineFlag’
/tmp/tmpxft_000050a6_00000000-13_first.o: In function runTest(int, char**)': tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to
cutGetCmdLineArgumenti’
tmpxft_000050a6_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to cutCheckCmdLineFlag' collect2: ld 返回 1 [root@lab519 test_gpu]# find -name "cutil32.so" [root@lab519 test_gpu]# find -name "cutil32.lib" [root@lab519 test_gpu]# whereis cutil32 cutil32: [root@lab519 test_gpu]# echo PATH PATH [root@lab519 test_gpu]# nvcc -o first first.cu -I/root/NVIDIA_GPU_Computing_SDK/CUDALibraries/common/inc /tmp/tmpxft_000050e7_00000000-13_first.o: In function
main’:
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x32): undefined reference to cutCheckCmdLineFlag' /tmp/tmpxft_000050e7_00000000-13_first.o: In function
runTest(int, char**)‘:
tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x142): undefined reference to cutGetCmdLineArgumenti' tmpxft_000050e7_00000000-1_first.cudafe1.cpp:(.text+0x1f1): undefined reference to
cutCheckCmdLineFlag’
collect2: ld 返回 1
[root@lab519 test_gpu]# vim first.cu
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cutil.h>
#include “first_kernel.cu”
void runTest(int argc,char** argv);
int main(int argc,char** argv){
runTest(argc,argv);
CUT_EXIT(argc,argv);
}
void runTest(int argc,char** argv){
CUT_DEVICE_INIT(argc,argv);
unsigned int num_blocks=4;
unsigned int num_threads=4;
unsigned int mem_size=sizeof(float)num_threadsnum_blocks;
float* h_idata=(float*)malloc(mem_size);
float* h_odata=(float*)malloc(mem_size);
float* d_idata;
CUDA_SAFE_CALL(cudaMalloc((void**)&d_idata,mem_size));
float* d_odata;
CUDA_SAFE_CALL(cudaMalloc((void**)&d_odata,mem_size));
for(unsigned int i=0;i<num_threads*num_blocks;i++)
h_idata[i]=1.0f;
CUDA_SAFE_CALL(cudaMemcpy(d_idata,h_idata,mem_size,cudaMemcpyHostToDevice));
dim3 grid(num_blocks,1,1);
dim3 threads(num_threads,1,1);
testKernel<<<grid,threads,mem_size>>>(d_idata,d_odata);
CUT_CHECK_ERROR(“Kernel execution failed”);
CUDA_SAFE_CALL(cudaMemcpy(h_odata,d_odata,mem_size,cudaMemcpyDeviceToHost));
for(unsigned int i=0;i<num_blocks;i++){
for(unsigned int j=0;j<num_threads;j++){
printf(“%5.0f”,h_odata[i*num_threads+j]);
}
printf (“\n”);
}
free(h_idata);
free(h_odata);
CUDA_SAFE_CALL(cudaFree(d_idata));
CUDA_SAFE_CALL(cudaFree(d_odata));
}
程序first_kernel.cu为:
#ifndef FIRST_KERNEL_H
#define FIRST_KERNEL_H
global void
testKernel(float* g_idata,float* g_odata){
extern shared float sdata;
const unsigned int bid=blockIdx.x;
const unsigned int tid_in_block=threadIdx.x;
const unsigned int tid_in_grid=blockDim.x*blockIdx.x+threadIdx.x;
sdata[tid_in_block]=g_idata[tid_in_grid];
__syncthreads();
sdata[tid_in_block]*=(float)bid;
__syncthreads();
g_odata[tid_in_grid]=sdata[tid_in_block];
}
#endif
求大家帮忙解决。