cublas 多次运行出现segementation fault

我用循环多次运行cublas,大概总数10000,但是当运行到200次左右的时候,出现segmentation fault, 之前的运行都没有任何的错误,为何突然出现这个错误?

楼主您好,看到您的帖子了。

您的代码在linux下出现Segmentation Fault,基本上属于您host代码部分的问题(例如,您使用了不正确的指针)。而此部分代码最大可能是您编写的,因为虽然cublas里也含有host部分代码,但cublas是成熟的库,因为它的原因出现问题的可能性不大。

那么基于这种对您的代码的考虑,我们来假设一下为何会运行到200左右才出错。我来假设一种情况:例如您的host代码有如下片段:p = (…)malloc(…); *(p+…) = …; 但却没有检查p是否合法,和/或free(p)的过程。那么当执行到一定时间,导致malloc失败的时候,p实际上的值是NULL, 从而导致segmentation fault.

因为您没有代码,只是对现象的描述,我大致如何推测了一下错误的所在和可能的原因之一。供您参考。

如果您方便提供您的代码,不妨跟帖到论坛,这样会让诸位会员们(和版主们), 更好的帮您看看。

祝您调试顺利!

我用了gdb调试,
Program received signal SIGSEGV, Segmentation fault.
0x03c7e0ad in ?? () from /usr/lib/libcuda.so.1
我在malloc前面设置了status检查,可以那个也没有错误啊

#include <stdio.h>
#include <math.h>
#include <time.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cblas.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_statistics.h>

int GenerateKey(int m, int n, int l, int q, double alpha, FILE *fc, FILE *fg)
{

// initialization
int i;
int j;
int t;
int po;
int repetition = 10;
int error=0;
double erate;


clock_t start ;
clock_t end;
cudaEvent_t begin, stop;
float timer;

cudaEventCreate(&begin);
cudaEventCreate(&stop);




double ti;
double ta;
double tc;
double tg;
double *Tc;
double *Tg;

// double *Te;

double *A;
double *S;
double *E;
double *B;
double *B1;

double *d_A;
double *d_S;
double *d_E;

double a =1.0;
double b =1.0;

int lda = m;
int lds = n;
int lde = m;	


A=(double*)malloc(sizeof(double)*(m*n)); 
S=(double*)malloc(sizeof(double)*(n*l));
E=(double*)malloc(sizeof(double)*(m*l));
B1=(double*)malloc(sizeof(double)*(m*l));
B=(double*)malloc(sizeof(double)*(m*l));
Tc = (double*)malloc(sizeof(double)*repetition);
Tg = (double*)malloc(sizeof(double)*repetition);

// Te = (double*)malloc(sizeof(double)*repetition);

start = clock();
double sigma = (alpha * q ) / sqrt(2 * M_PI);
// generator initialization
const gsl_rng_type *T;   // type of generator
gsl_rng *gen;

gsl_rng_env_setup();

T = gsl_rng_default;
gsl_rng_default_seed = ((unsigned long)(time(NULL)));
gen = gsl_rng_alloc(T);

printf("Data initialization:");	

// init A
for(i = 0; i < m*n; i++)
{
	
	
		A[i]=(double)gsl_rng_uniform_int(gen,q);

}

// init S
for(i = 0; i < n*l; i++)
{

		S[i]=(double)gsl_rng_uniform_int(gen,q);
}

// init E
for(i = 0; i < m*l; i++)
{
		po =fmod(round(gsl_ran_gaussian(gen,sigma)), q);
		if(po < 0)
		{
			E[i] = po + q;
		}
		else
		{
			E[i] = po;
		}
		

}
end = clock();
ti = (double)(end - start) / CLOCKS_PER_SEC;
ti = ti * 1000;


printf("\n");
for(j = 0; j < repetition; j++)
{
	
	printf("cublas begin\n");

// start = clock();
cudaEventRecord(begin, 0);
cublasHandle_t handle;
cublasCreate(&handle);
cublasStatus_t status;

	status =cudaMalloc((void**)&d_A, m*n*sizeof(double));
	if(status !=CUBLAS_STATUS_SUCCESS){
		fprintf(stderr,"!!!!CUBLAS initialization error\n");
		return EXIT_FAILURE;
	}
	status =cudaMalloc((void**)&d_S, n*l*sizeof(double));
	if(status !=CUBLAS_STATUS_SUCCESS){
		fprintf(stderr,"!!!!CUBLAS initialization error\n");
		return EXIT_FAILURE;
	}
	status =cudaMalloc((void**)&d_E, m*l*sizeof(double));
	if(status !=CUBLAS_STATUS_SUCCESS){
		fprintf(stderr,"!!!!CUBLAS initialization error\n");
		return EXIT_FAILURE;
	}

	cublasSetVector(m*n,sizeof(double),A,1,d_A,1);
	cublasSetVector(n*l,sizeof(double),S,1,d_S,1);
	cublasSetVector(m*l,sizeof(double),E,1,d_E,1);


	cublasDgemm(handle,CUBLAS_OP_N,CUBLAS_OP_N, m,l,n,&a,d_A,lda,d_S,lds,&b,d_E,lde);


	cublasGetVector(m*l,sizeof(double),d_E,1,B1,1);


	for(i = 0; i < m*l; i++)
	{
			po = fmod(B1[i], q);
			if(po < 0)
			{
				B1[i] = po + q;
			}
			else
			{
				B1[i] = po;
			}

	}


	cudaFree(d_A);
	cudaFree(d_S);
	cudaFree(d_E);



	cublasDestroy(handle);

	end = clock();
//	tg = (double)(end - start) / CLOCKS_PER_SEC;
//	tg = tg * 1000;
	cudaEventRecord(stop,0);
	cudaEventSynchronize(stop);
	cudaEventElapsedTime(&timer, begin, stop);
	cudaEventDestroy(begin);
	cudaEventDestroy(stop);

	printf("cublas end!\n");

	printf("gsl cblas begins!\n");
	start = clock();
	cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, l, n,
			1.0, A, lda, S, lds, 1.0, E, lde);

	for(i = 0; i < m*l; i++)
	{
			po = fmod(E[i], q);
			if(po < 0)
			{
				B[i] = po + q;
			}
			else
			{
				B[i] = po;
			}
	}
	end = clock();
	ta = (double)(end - start) / CLOCKS_PER_SEC;
	ta = ta * 1000;
	printf("gsl cblas end!\n");

	// time 
	tc = ti +ta;
	tg =  ti + timer;
	printf("CPU:%f, GPU:%f",tc,tg);

	Tc[j] = tc;
	Tg[j] = tg;
	error = 0;
	for(i = 0; i< m*l; i++)
	{
		if(gsl_fcmp(B[i],B1[i],0.00000001)!=0)
		{
			error += 1;
		}
	}
	erate = (double)error / (m*l);
	printf("error rate:%f\n", erate);

// Te[j] = erate;

}


fwrite(Tc,sizeof(double),repetition,fc);
fwrite(Tg,sizeof(double),repetition,fg);

// fwrite(Te,sizeof(double),repetition,fe);

gsl_rng_free(gen);
free(A);
free(S);
free(E);
free(B1);
free(B);


return 0;

}

int main()
{
int i = 0;
int num = 0;
int repetition =1000;
FILE *fc;
FILE *fg;
FILE *fe;
double meanc[12];
double meang[12];
double variancec[12];
double varianceg[12];

double *tc;
double *tg;
tc = (double*)malloc(sizeof(double)*repetition*10);
tg = (double*)malloc(sizeof(double)*repetition*10);

fc = fopen("233bc", "wb");
fg = fopen("233bg", "wb");

for(i = 0; i < repetition; i++)
{
			
	GenerateKey(4536,233,233,32749,0.000217,fc,fg);
}

fclose(fc);
fclose(fg);


fc = fopen("233bc", "rb");
fg = fopen("233bg", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

/*
for(i = 0; i < 4; i++)
{
printf("%g ",tc[i]);

}
printf("\n");

for(i = 0; i < 4; i++)
{
printf("%g ",tg[i]);

}

printf("%f  %f  %f  %f  ",meanc[num],variancec[num],meang[num],varianceg[num]);

*/
fclose(fc);
fclose(fg);
num++;

fc = fopen("233ac", "wb");
fg = fopen("233ag", "wb");
for(i = 0; i < repetition; i++)
{
	GenerateKey(1042,233,233,32749,0.000217,fc,fg);
}
fclose(fc);
fclose(fg);

fc = fopen("233ac", "rb");
fg = fopen("233ag", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

/* for(i = 0; i < 4; i++)
{
printf("%g ",tc[i]);

}
printf("\n");

for(i = 0; i < 4; i++)
{
printf("%g ",tg[i]);

}

printf("%f  %f  %f  %f  ",meanc[num],variancec[num],meang[num],varianceg[num]);

*/

fclose(fc);
fclose(fg);
num++;




fc = fopen("214c", "wb");
fg = fopen("214g", "wb");
for(i = 0; i < repetition; i++)
{
	GenerateKey(1333,214,214,16381,0.00045,fc,fg);
}	
fclose(fc);
fclose(fg);


fc = fopen("214c", "rb");
fg = fopen("214g", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

fclose(fc);
fclose(fg);
num++;





fc = fopen("192c", "wb");
fg = fopen("192g", "wb");
for(i = 0; i < repetition; i++)
{
	GenerateKey(1500,192,192,8191,0.0009959,fc,fg);
}
fclose(fc);
fclose(fg);

fc = fopen("192c", "rb");
fg = fopen("192g", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

fclose(fc);
fclose(fg);
num++;



fc = fopen("166c", "wb");
fg = fopen("166g", "wb");
for(i = 0; i < repetition; i++)
{
	GenerateKey(1319,166,166,4093,0.0024,fc,fg);
}
fclose(fc);
fclose(fg);

fc = fopen("166c", "rb");
fg = fopen("166g", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

fclose(fc);
fclose(fg);
num++;

fc = fopen("136c", "wb");
fg = fopen("136g", "wb");
for(i = 0; i < repetition; i++)
{
	GenerateKey(2008,136,136,2003,0.0065,fc,fg);
}
fclose(fc);
fclose(fg);

fc = fopen("136c", "rb");
fg = fopen("136g", "rb");

fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);

meanc[num] =gsl_stats_mean(tc,1,repetition*10); 
variancec[num] =gsl_stats_variance(tc,1,repetition*10); 

meang[num] =gsl_stats_mean(tg,1,repetition*10); 
varianceg[num] =gsl_stats_variance(tg,1,repetition*10); 

fclose(fc);
fclose(fg);
num++;

for(i = 0; i < 6; i++)
{
	printf("meanc:%f, meang:%f, variancac:%f, varianceg:%f, speedup:%f\n",meanc[i],meang[i],variancec[i],varianceg[i], meanc[i]/meang[i]);
}

return 0;
}

看到了楼主您的回帖。

在您的帖子里,我看到了Tc, Tg的多次分配,但似乎却没有找到对他们指向的内存的释放过程. 建议修正?
不过,这似乎只是一个小问题。。。

此外,既然贵代码挂了libcuda.so的领空里,再加上很多gsl之类的函数调用,我不懂。

所以可能下文继续的我的表达是不正确的或者对您没有帮助性。

如果方便,能否发送一下出错时候的stack trace信息?(输入backtrace回车)。这可能对我,对其他会员提供宝贵的参考意见。

lz的程序至少有一个明显会导致seg fault的问题:

这两行
cudaEventCreate(&begin);
cudaEventCreate(&stop);
在for循环外面。

cudaEventDestroy(begin);
cudaEventDestroy(stop);
在for循环里面。所以需要把这两行移到for循环后面去。