__global__ void siKernel(float *c, float *a,float *b)
{
const int i = threadIdx.x;
__shared__ float d[10];
d[i]=0;
d[i] = a[i] - b[i];
d[i]=d[i]*d[i];
__syncthreads();
d[i]=d[i]+d[i+1];
c[i]=sqrtf(d[i]);
}
int main()
{
clock_t st,et;
float a[10],b[10],res[10];
for(int i=0;i!=10;i++)
{
a[i]=i+1;
b[i]=i;
}
int n=sizeof(float)*10;
int *ad,*bd,*cd;
st=clock();
cudaMalloc(&ad,n);
cudaMalloc(&bd,n);
cudaMemcpy(ad,a,n,cudaMemcpyHostToDevice);
cudaMemcpy(bd,b,n,cudaMemcpyHostToDevice);
cudaMalloc(&cd,n);
siKernel<<<1,10,sizeof(float)*10>>>(cd,ad,bd);
cudaMemcpy(res,cd,n,cudaMemcpyDeviceToHost);
et=clock();
for(int i=0;i!=10;i++)
printf("%f",res[i]);
printf(",%d",et-st);
int i=0;
scanf("%d",&i);
}
不知道为什么,如果把里面的数组什么的设置成INT就不会出问题,但是像这样设置成FLOAT就会编译出错
具体错误:1>d:/my documents/visual studio 2010/Projects/ses/ses/kernel.cu(44): error : argument of type “int *” is incompatible with parameter of type “float *”
1>
1>d:/my documents/visual studio 2010/Projects/ses/ses/kernel.cu(44): error : argument of type “int *” is incompatible with parameter of type “float *”
1>
1>d:/my documents/visual studio 2010/Projects/ses/ses/kernel.cu(44): error : argument of type “int *” is incompatible with parameter of type “float *”
1>
1> 3 errors detected in the compilation of “C:/Users/ADMINI~1/AppData/Local/Temp/tmpxft_00000a5c_00000000-5_kernel.cpp1.ii”.
1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\BuildCustomizations\CUDA 5.0.targets(592,9): error MSB3721: 命令““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\bin\nvcc.exe” -gencode=arch=compute_10,code="sm_10,compute_10" --use-local-env --cl-version 2010 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v5.0\include" -G --keep-dir “Debug” -maxrregcount=0 --machine 32 --compile -g -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o “Debug\kernel.cu.obj” “d:\my documents\visual studio 2010\Projects\ses\ses\kernel.cu””已退出,返回代码为 2。
1>已完成执行任务“CudaCompile”的操作 - 失败。
1>已完成在项目“ses.vcxproj”中生成目标“CudaBuild”的操作 - 失败。
错误中的44行是这里代码的35行
谢谢各位大大了!!