初始化的问题

大家好,我有个问题,写一个小程序,先初始化CPU数组,不调用内核,直接从CPU再拷到GPU,再从GPU拷回来。但数据显示,在最后一部分数据会超出溢界。请问是怎么回事啊?难道我的数组太大了,不会叫才四千多…各位大大帮个忙

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <string.h>
#include <math.h>
#include <cutil.h>

#define NX 64
#define NY 64
#define NUM_THREADS 4
void RunLBM(int argc, char** argv)
{
CUT_DEVICE_INIT(argc,argv);

unsigned int mem_size=sizeof(float)NXNY;
float* hr=(float*) malloc(mem_size);

for(unsigned int i=0;i<NXNY;i++)
{
hr[i]=1.0f;
}
float
frNew;
cudaMalloc((void**)&frNew,mem_size);
CUDA_SAFE_CALL(cudaMemcpy( frNew,hr, mem_size, // copy host memory to device
cudaMemcpyHostToDevice) );

CUDA_SAFE_CALL(cudaMemcpy(hr, frNew, mem_size,
cudaMemcpyDeviceToHost) );
for(unsigned int i=0;i< NX*NY;i++)
printf(“%d %f\n”,i,hr[i]);

free(hr);
CUDA_SAFE_CALL(cudaFree(frNew));
cudaThreadExit();

}
int main(int argc, char** argv)
{

printf(“\n you win\n”);
RunLBM( argc, argv);

CUT_EXIT(argc, argv);
}[/i][/i]

[ 本帖最后由 zhangarbu 于 2010-7-20 09:41 编辑 ]