我用循环多次运行cublas,大概总数10000,但是当运行到200次左右的时候,出现segmentation fault, 之前的运行都没有任何的错误,为何突然出现这个错误?
楼主您好,看到您的帖子了。
您的代码在linux下出现Segmentation Fault,基本上属于您host代码部分的问题(例如,您使用了不正确的指针)。而此部分代码最大可能是您编写的,因为虽然cublas里也含有host部分代码,但cublas是成熟的库,因为它的原因出现问题的可能性不大。
那么基于这种对您的代码的考虑,我们来假设一下为何会运行到200左右才出错。我来假设一种情况:例如您的host代码有如下片段:p = (…)malloc(…); *(p+…) = …; 但却没有检查p是否合法,和/或free(p)的过程。那么当执行到一定时间,导致malloc失败的时候,p实际上的值是NULL, 从而导致segmentation fault.
因为您没有代码,只是对现象的描述,我大致如何推测了一下错误的所在和可能的原因之一。供您参考。
如果您方便提供您的代码,不妨跟帖到论坛,这样会让诸位会员们(和版主们), 更好的帮您看看。
祝您调试顺利!
我用了gdb调试,
Program received signal SIGSEGV, Segmentation fault.
0x03c7e0ad in ?? () from /usr/lib/libcuda.so.1
我在malloc前面设置了status检查,可以那个也没有错误啊
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_cblas.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_statistics.h>
int GenerateKey(int m, int n, int l, int q, double alpha, FILE *fc, FILE *fg)
{
// initialization
int i;
int j;
int t;
int po;
int repetition = 10;
int error=0;
double erate;
clock_t start ;
clock_t end;
cudaEvent_t begin, stop;
float timer;
cudaEventCreate(&begin);
cudaEventCreate(&stop);
double ti;
double ta;
double tc;
double tg;
double *Tc;
double *Tg;
// double *Te;
double *A;
double *S;
double *E;
double *B;
double *B1;
double *d_A;
double *d_S;
double *d_E;
double a =1.0;
double b =1.0;
int lda = m;
int lds = n;
int lde = m;
A=(double*)malloc(sizeof(double)*(m*n));
S=(double*)malloc(sizeof(double)*(n*l));
E=(double*)malloc(sizeof(double)*(m*l));
B1=(double*)malloc(sizeof(double)*(m*l));
B=(double*)malloc(sizeof(double)*(m*l));
Tc = (double*)malloc(sizeof(double)*repetition);
Tg = (double*)malloc(sizeof(double)*repetition);
// Te = (double*)malloc(sizeof(double)*repetition);
start = clock();
double sigma = (alpha * q ) / sqrt(2 * M_PI);
// generator initialization
const gsl_rng_type *T; // type of generator
gsl_rng *gen;
gsl_rng_env_setup();
T = gsl_rng_default;
gsl_rng_default_seed = ((unsigned long)(time(NULL)));
gen = gsl_rng_alloc(T);
printf("Data initialization:");
// init A
for(i = 0; i < m*n; i++)
{
A[i]=(double)gsl_rng_uniform_int(gen,q);
}
// init S
for(i = 0; i < n*l; i++)
{
S[i]=(double)gsl_rng_uniform_int(gen,q);
}
// init E
for(i = 0; i < m*l; i++)
{
po =fmod(round(gsl_ran_gaussian(gen,sigma)), q);
if(po < 0)
{
E[i] = po + q;
}
else
{
E[i] = po;
}
}
end = clock();
ti = (double)(end - start) / CLOCKS_PER_SEC;
ti = ti * 1000;
printf("\n");
for(j = 0; j < repetition; j++)
{
printf("cublas begin\n");
// start = clock();
cudaEventRecord(begin, 0);
cublasHandle_t handle;
cublasCreate(&handle);
cublasStatus_t status;
status =cudaMalloc((void**)&d_A, m*n*sizeof(double));
if(status !=CUBLAS_STATUS_SUCCESS){
fprintf(stderr,"!!!!CUBLAS initialization error\n");
return EXIT_FAILURE;
}
status =cudaMalloc((void**)&d_S, n*l*sizeof(double));
if(status !=CUBLAS_STATUS_SUCCESS){
fprintf(stderr,"!!!!CUBLAS initialization error\n");
return EXIT_FAILURE;
}
status =cudaMalloc((void**)&d_E, m*l*sizeof(double));
if(status !=CUBLAS_STATUS_SUCCESS){
fprintf(stderr,"!!!!CUBLAS initialization error\n");
return EXIT_FAILURE;
}
cublasSetVector(m*n,sizeof(double),A,1,d_A,1);
cublasSetVector(n*l,sizeof(double),S,1,d_S,1);
cublasSetVector(m*l,sizeof(double),E,1,d_E,1);
cublasDgemm(handle,CUBLAS_OP_N,CUBLAS_OP_N, m,l,n,&a,d_A,lda,d_S,lds,&b,d_E,lde);
cublasGetVector(m*l,sizeof(double),d_E,1,B1,1);
for(i = 0; i < m*l; i++)
{
po = fmod(B1[i], q);
if(po < 0)
{
B1[i] = po + q;
}
else
{
B1[i] = po;
}
}
cudaFree(d_A);
cudaFree(d_S);
cudaFree(d_E);
cublasDestroy(handle);
end = clock();
// tg = (double)(end - start) / CLOCKS_PER_SEC;
// tg = tg * 1000;
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&timer, begin, stop);
cudaEventDestroy(begin);
cudaEventDestroy(stop);
printf("cublas end!\n");
printf("gsl cblas begins!\n");
start = clock();
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, l, n,
1.0, A, lda, S, lds, 1.0, E, lde);
for(i = 0; i < m*l; i++)
{
po = fmod(E[i], q);
if(po < 0)
{
B[i] = po + q;
}
else
{
B[i] = po;
}
}
end = clock();
ta = (double)(end - start) / CLOCKS_PER_SEC;
ta = ta * 1000;
printf("gsl cblas end!\n");
// time
tc = ti +ta;
tg = ti + timer;
printf("CPU:%f, GPU:%f",tc,tg);
Tc[j] = tc;
Tg[j] = tg;
error = 0;
for(i = 0; i< m*l; i++)
{
if(gsl_fcmp(B[i],B1[i],0.00000001)!=0)
{
error += 1;
}
}
erate = (double)error / (m*l);
printf("error rate:%f\n", erate);
// Te[j] = erate;
}
fwrite(Tc,sizeof(double),repetition,fc);
fwrite(Tg,sizeof(double),repetition,fg);
// fwrite(Te,sizeof(double),repetition,fe);
gsl_rng_free(gen);
free(A);
free(S);
free(E);
free(B1);
free(B);
return 0;
}
int main()
{
int i = 0;
int num = 0;
int repetition =1000;
FILE *fc;
FILE *fg;
FILE *fe;
double meanc[12];
double meang[12];
double variancec[12];
double varianceg[12];
double *tc;
double *tg;
tc = (double*)malloc(sizeof(double)*repetition*10);
tg = (double*)malloc(sizeof(double)*repetition*10);
fc = fopen("233bc", "wb");
fg = fopen("233bg", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(4536,233,233,32749,0.000217,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("233bc", "rb");
fg = fopen("233bg", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
/*
for(i = 0; i < 4; i++)
{
printf("%g ",tc[i]);
}
printf("\n");
for(i = 0; i < 4; i++)
{
printf("%g ",tg[i]);
}
printf("%f %f %f %f ",meanc[num],variancec[num],meang[num],varianceg[num]);
*/
fclose(fc);
fclose(fg);
num++;
fc = fopen("233ac", "wb");
fg = fopen("233ag", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(1042,233,233,32749,0.000217,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("233ac", "rb");
fg = fopen("233ag", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
/* for(i = 0; i < 4; i++)
{
printf("%g ",tc[i]);
}
printf("\n");
for(i = 0; i < 4; i++)
{
printf("%g ",tg[i]);
}
printf("%f %f %f %f ",meanc[num],variancec[num],meang[num],varianceg[num]);
*/
fclose(fc);
fclose(fg);
num++;
fc = fopen("214c", "wb");
fg = fopen("214g", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(1333,214,214,16381,0.00045,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("214c", "rb");
fg = fopen("214g", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
fclose(fc);
fclose(fg);
num++;
fc = fopen("192c", "wb");
fg = fopen("192g", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(1500,192,192,8191,0.0009959,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("192c", "rb");
fg = fopen("192g", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
fclose(fc);
fclose(fg);
num++;
fc = fopen("166c", "wb");
fg = fopen("166g", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(1319,166,166,4093,0.0024,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("166c", "rb");
fg = fopen("166g", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
fclose(fc);
fclose(fg);
num++;
fc = fopen("136c", "wb");
fg = fopen("136g", "wb");
for(i = 0; i < repetition; i++)
{
GenerateKey(2008,136,136,2003,0.0065,fc,fg);
}
fclose(fc);
fclose(fg);
fc = fopen("136c", "rb");
fg = fopen("136g", "rb");
fread(tc,sizeof(double),repetition*10,fc);
fread(tg,sizeof(double),repetition*10,fg);
meanc[num] =gsl_stats_mean(tc,1,repetition*10);
variancec[num] =gsl_stats_variance(tc,1,repetition*10);
meang[num] =gsl_stats_mean(tg,1,repetition*10);
varianceg[num] =gsl_stats_variance(tg,1,repetition*10);
fclose(fc);
fclose(fg);
num++;
for(i = 0; i < 6; i++)
{
printf("meanc:%f, meang:%f, variancac:%f, varianceg:%f, speedup:%f\n",meanc[i],meang[i],variancec[i],varianceg[i], meanc[i]/meang[i]);
}
return 0;
}
看到了楼主您的回帖。
在您的帖子里,我看到了Tc, Tg的多次分配,但似乎却没有找到对他们指向的内存的释放过程. 建议修正?
不过,这似乎只是一个小问题。。。
此外,既然贵代码挂了libcuda.so的领空里,再加上很多gsl之类的函数调用,我不懂。
所以可能下文继续的我的表达是不正确的或者对您没有帮助性。
如果方便,能否发送一下出错时候的stack trace信息?(输入backtrace回车)。这可能对我,对其他会员提供宝贵的参考意见。
lz的程序至少有一个明显会导致seg fault的问题:
这两行
cudaEventCreate(&begin);
cudaEventCreate(&stop);
在for循环外面。
而
cudaEventDestroy(begin);
cudaEventDestroy(stop);
在for循环里面。所以需要把这两行移到for循环后面去。