又出现了一个新的问题:
#include <stdio.h>
#include <stdlib.h>
#include “cuda_runtime.h”
#include <time.h>
#include “device_launch_parameters.h”
#define DATA_SIZE 1048567
#define THREAD_NUM 256
int data[DATA_SIZE];
bool InitCUDA(){
int count;
cudaGetDeviceCount(&count) ;
if ( count == 0)
{
fprintf(stderr, "There is no device.\n") ;
return false;
}
int i;
for ( i = 0 ;i < count ; i++)
{
cudaDeviceProp prop ;
if ( cudaGetDeviceProperties(& prop ,i) == cudaSuccess)
{
if ( prop.major >= 1)
{
break;
}
}
}
if ( i == count)
{
fprintf(stderr ,"There is no device supporting CUDA1.x.\n");
return false;
}
cudaSetDevice( i );
return true;
}
void generateNumbers(int* data, int size){
for (int i = 0; i < size; i++)
{
data[i] = rand()%10;
}
}
global static void sumofSquares(int* num, int* result, clock_t* gpu_time){
int sum = 0;
// const int tid = threadIdx.x;
//
// const int size = DATA_SIZE/THREAD_NUM;
clock_t start = clock();
for(int i = 0 ;i < DATA_SIZE; i++){
sum += num[i]*num[i];
}
*result = sum;
*gpu_time = clock()-start;
}
int main(){
if (! InitCUDA())
{
return 0;
}
printf("Hello word ,cuda has been initialized.\n");
generateNumbers(data,DATA_SIZE);
int* gpudata,*gpu_result;
clock_t* gpu_time = NULL;
clock_t gpu_time_used;
clock_t cpu_time;
clock_t cpu_time_used;
cudaMalloc((void**)&gpudata, sizeof(int)*DATA_SIZE);
cudaMalloc((void**)&gpu_result, sizeof(int));
cudaMalloc((void**)gpu_time, sizeof(clock_t));
cudaMemcpy(gpudata, data,sizeof(int)*DATA_SIZE,cudaMemcpyHostToDevice);
sumofSquares<<<1, 1, 0>>>(gpudata,gpu_result, gpu_time);
cudaMemcpy(&gpu_time_used, gpu_time, sizeof(clock_t),cudaMemcpyDeviceToHost);
int sum ;
cudaMemcpy(&sum ,gpu_result,sizeof(int),cudaMemcpyDeviceToHost);
printf("sum(gpu)=%d,gpu_time_used=%d\n", sum, gpu_time_used );
// sum = 0;
// clock_t cpu_start = clock();
// for (int i = 0 ;i < DATA_SIZE; i++)
// {
// sum += data[i]*data[i];
//
// }
// cpu_time_used = clock() - cpu_start;
//
// printf(“sum(cpu)=%d,cpu_time_used=%d\n”,sum, cpu_time_used);
return 0;
}
上面代码中,cudaMalloc((void**)gpu_time, sizeof(clock_t)); gpu_time 缺了个取地址符,导致结果:sum(cpu) 和 cpu_time_used均不正确。
后修正为&gpu_time 后,sum(cpu)和cpu_time_used 均运算正确。
想问的问题是:sumofSquares<<<1, 1, 0>>>(gpudata,gpu_result, gpu_time); gpu_time 和gpu_result 不存在任何关系,为什么会影响结果正确性?