数组求和的CU程序

小弟刚学cuda,做了一个数组元素求和的程序,源程序如下。不知道那里有错误,请各位大侠不吝指点。。。拜谢。。。

#include<stdlib.h>
#include<string.h>
#include<math.h>
#include<stdio.h>
#include<cutil.h>

global void
testKernel( float* g_idata,float* g_odata )
{
extern shared float sdata;

const unsigned int tid = threadIdx.x;

const unsigned int num_data = blockDim.x;

sdata[tid]=g_idata[tid];

unsigned int time=(int)(log((float)num_data)/log(2.0f));

unsigned int run_thread=num_data/2;

for(int i=time;i>0;–i)
{

if(tid<run_threads)
sdata[tid]=sdata[tid]+sdata[tid+run_thread];
__syncthreads();
run_thread /=2;
}
__syncthreads();
g_odata[tid]=sdata[tid];
}

int main(int argc,char **argv)
{
CUT_DEVICE_INIT(argc,argv);

float a[8]={1,2,3,4,5,6,7,8};

int f=sizeof(float);
int mem_size=8*f;

float g_idata;
CUDA_SAFE_CALL(cudaMalloc((void
*)&g_idata,mem_size));
float g_odata;
CUDA_SAFE_CALL(cudaMalloc((void
*)&g_odata,mem_size));

CUDA_SAFE_CALL(cudaMemcpy(g_idata,a,mem_size,cudaMemcpyHostToDevice));

dim3 grid(1,1,1);
dim3 block(8,1,1);
testKernel <<<grid,block,mem_size>>>(g_idata,g_odata);

CUT_CHECK_ERROR(“kernel executin failed”);
CUDA_SAFE_CALL(cudaMemcpy(g_odata,a,f,cudaMemcpyDeviceToHost));

printf(“%lf”,a[0]);
free(a);

CUDA_SAFE_CALL(cudaFree(g_idata));
CUDA_SAFE_CALL(cudaFree(g_odata));

CUT_EXIT(argc,argv);

}

没什么

看reduction代码,另外最好说明一下错误信息