照着网上的例子运行了一下,想计算一下GPU的运行时间。用了cudaEventElapsedTime()来算时间,可是最后costtime的值似乎没有存上,还是初始值0.
请问大大这是咋回事?
#include <stdio.h>
#include <stdlib.h>
#include “cuda_runtime_api.h”
#define DATA_SIZE 1048576
int data[DATA_SIZE];
//this function is to generate 0-9 digital number randomly.
void GenerateNumber(int* number, int size) {
for (int i=0; i<size;i++)
{
number[i]=rand()%10;
}
}
global static void sumOfSquares(int *num, int *result)
{
int sum = 0;
int i;
//clock_t start = clock();//
for (i=0; i<DATA_SIZE; i++)
{
sum+=num[i]*num[i];
}
*result = sum;
//*time = clock()-start;
}
int main() {
GenerateNumber(data,DATA_SIZE);
int *gpudata, *result;
cudaMalloc((void**)&gpudata,sizeof(int)*DATA_SIZE);
cudaMalloc((void**)&result,sizeof(int));
cudaMemcpy(gpudata,data,sizeof(int)*DATA_SIZE,cudaMemcpyHostToDevice);
cudaEvent_t start,stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
sumOfSquares<<<1,1,0>>>(gpudata,result);//input and output and time-return
cudaEventRecord(stop,0);
float costtime = 0;
cudaEventElapsedTime(&costtime,start,stop);
printf("time: %3.1f",costtime);
cudaEventDestroy(start);
cudaEventDestroy(stop);
int sum;
cudaMemcpy(&sum,result,sizeof(int),cudaMemcpyDeviceToHost);
cudaFree(gpudata);
cudaFree(result);
printf("sum=%d\n", sum);
//verify the result of GPU and CPU
sum = 0;
for(int i=0; i<DATA_SIZE; i++)
sum +=data[i]*data[i];
printf("sum(CPU):%d\n",sum);
}
[attach]3370[/attach]