linux下运行错误free(): invalid next size (fast)

在linux编译时出现如题所述的问题,我将错误贴一部分在下面,麻烦大家看看啊

*** glibc detected *** ./myfirst: free(): invalid next size (fast): 0x000000000af4c0f0 ***
======= Backtrace: =========
/lib64/libc.so.6[0x3233271cec]
/lib64/libc.so.6(cfree+0x8c)[0x323327590c]
/usr/lib64/libcuda.so[0x2ab323359add]
/home/user/bin/cuda/lib64/libcudart.so.2(cudaMalloc+0x44)[0x2ab323094e34]
./myfirst[0x403974]
/lib64/libc.so.6(__libc_start_main+0xf4)[0x323321d974]
./myfirst(__gxx_personality_v0+0x139)[0x4032e9]
======= Memory map: ========
00400000-00418000 r-xp 00000000 08:08 458761 /home/user/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/myfirst
2ab324189000-2ab324289000 rw-s 11d099000 00:11 11518 /dev/nvidia0
2ab324289000-2ab324389000 rw-s 11e80d000 00:11 11518 /dev/nvidia0
2ab328000000-2ab328021000 rw-p 2ab328000000 00:00 0
2ab328021000-2ab32c000000 —p 2ab328021000 00:00 0

7fff15d1d000-7fff15d32000 rw-p 7ffffffea000 00:00 0 [stack]
ffffffffff600000-ffffffffffe00000 —p 00000000 00:00 0 [vdso]
已放弃

大概就是这样啊
现在越来越晕了啊,感觉自己不是在CUDA,反倒是在使劲学习C啊,哎,C基础不好啊,但是这类问题还是第一次碰到,麻烦大家 啊。

没看懂,能否把代码放上来?

下面是代码,我感觉是不是我分配的空间太多了啊??
显存一个G,对于这个来说,应该是足够的了啊,我觉得,溢出的话怎么解决啊?


#include "myfirst_kernel.cu"
#include "cutil.h"
#include <cstdio>
#include <cstdlib>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <math.h>
 int main( int argc,char** argv)  //the main program added by zhoulin 2010.3.4
   {
	CUT_DEVICE_INIT(argc, argv);
	
	// 4 floats each for alignment reasons
	unsigned int memSize = sizeof( float) * numBodies;

	//host端分配内存
	clock_t* timer=(clock_t*)malloc(32*sizeof(clock_t)); //我们这里并非是计算一个block的时间,而是总共的运算时间
	if(timer==NULL) {printf("memory of timer is fault");exit(0);}
	float4* h_pos=(float4*)malloc(numBodies*sizeof(float));
	if(h_pos==NULL) {printf("memory of h_pos is fault");exit(0);}
	float4* h_vel=(float4*)malloc(numBodies*sizeof(float));
	if(h_vel==NULL) {printf("memory of h_vel is fault");exit(0);}
	
	//生成初试数据  bodysystemcpu.cpp
   float alat=1.5496f;
   float disp=0.5f;
   int index=0;
   float rcell[3][4]={0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.5,0.5,0.5,0.0,0.5};
   srand(int(time(NULL)/2));
   for(int k=0;k<2;k++)  //8
   { 
   for(int j=0;j<2;j++)//8
   {      
   for(int i=0;i<4;i++)
   {    
   for(int L=0;L<4;L++)
   {        
	h_pos[index].x=alat*(i+rcell[1][L])+2.0f*disp*(rand()/(float)RAND_MAX-0.5f);
	h_pos[index].y=alat*(i+rcell[2][L])+2.0f*disp*(rand()/(float)RAND_MAX-0.5f);
	h_pos[index].z=alat*(i+rcell[3][L])+2.0f*disp*(rand()/(float)RAND_MAX-0.5f);
	h_pos[index].w=1.0f;
	h_vel[index].x=0.0f;
	h_vel[index].y=0.0f;
	h_vel[index].z=0.0f;
	h_vel[index].w=0.0f;
   index++;
   }
   }
   }
   }
	//device端分配内存
   clock_t* dtimer;
	CUDA_SAFE_CALL(cudaMalloc((void**)&dtimer, sizeof(clock_t)*16*2));
	float4* d_vel;
	CUDA_SAFE_CALL(cudaMalloc((float4**)&d_vel,numBodies*sizeof(float));
	float4* d_pos;
	CUDA_SAFE_CALL(cudaMalloc((float3**)&pos, numBodies*sizeof(float));
	
	//向显存拷入数据
   CUDA_SAFE_CALL(cudaMemcpy(d_pos, h_pos, memSize,cudaMemcpyHostToDevice));
   CUDA_SAFE_CALL(cudaMemcpy(d_vel, h_vel, memSize,cudaMemcpyHostToDevice));
	//运行核函数
	dim3 grid(16, 1, 1);
	   // execute the kernel: we set q=1 here.-----zhoulin 2010.3.4
	integrateBodies<<< grid, threads, memSize >>>(d_pos, d_vel,dtimer);

   	// check if kernel invocation generated an error
	 CUT_CHECK_ERROR("Kernel execution failed");
	
   //将数据拷回主机内存
   CUDA_SAFE_CALL(cudaMemcpy(timer, dtimer,sizeof(clock_t)*32, cudaMemcpyDeviceToHost));
   CUDA_SAFE_CALL(cudaMemcpy(h_pos, d_pos, memSize, cudaMemcpyDeviceToHost));
	CUDA_SAFE_CALL(cudaMemcpy(h_vel, d_vel, memSize, cudaMemcpyDeviceToHost));
	//释放存储器,但是运行时不管有没有以下的free语句,错误提示相同
	free(h_pos);
   free(h_acc);
	free(h_vel);
	CUDA_SAFE_CALL(cudaFree(d_pos));
	CUDA_SAFE_CALL(cudaFree(d_acc));
	CUDA_SAFE_CALL(cudaFree(d_vel));
	CUDA_SAFE_CALL(cudaFree(dtimer));

	//时间测试
   clock_t minStart = timer[0];
   clock_t maxEnd = timer[16];
   for (int i = 1; i < 16; i++)
   { 
   minStart = timer[i] < minStart ? timer[i] : minStart; 
   maxEnd = timer[16+i] > maxEnd ? timer[16+i] : maxEnd;
   }
   printf("time = %d\n", maxEnd - minStart);
	CUT_EXIT(argc, argv);  //exit CUDA
   }
//}

[ 本帖最后由 hnuzhoulin 于 2010-4-5 16:52 编辑 ]

你用malloc,然后用cudaFreeHost,这两个不搭配啊

呵呵,没注意,应该用注释掉的free,但是还是同样的问题,那个分配内存的时候,有一个地方没有改过来,现在改了

你用CUDA-gdb看一下,看它提示那里有错,呵呵!光眼睛看实在太麻烦!呵呵

这个CUDA-GDB不会用啊
找到手册了,先看看,谢谢提醒啊

不用谢,对了,找到错误后,帖上来看看,呵呵!