换成sizeof(int) 不行 我的程序是这样的 有没有高手帮看看能不能调过
/**
- Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
- Please refer to the NVIDIA end user license agreement (EULA) associated
- with this source code for terms and conditions that govern your use of
- this software. Any use, reproduction, disclosure, or distribution of
- this software and related documentation outside the terms of the EULA
- is strictly prohibited.
*/
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
define N 3
global void add(int *a,int *b,int *c,int *dev_a,int dev_b,int dev_c){
int tid = blockIdx.xblockDim.x +threadIdx.x;
printf(“%d \n”,tid);
/
*dev_a = *a;
*dev_b = *b;
*dev_c = *c;
printf(" tid : %d a : %d b: %d c: %d \n",tid,*a,*b,*c);
/
}
/
global void add(int *a,int *b,int c){
int tid = blockIdx.xblockDim.x +threadIdx.x;
printf(" tid : %d a : %d b: %d c: %d \n",tid,*a,*b,*c);
// if(tid <N) c[tid]= a[tid]+b[tid];
}
*/
/**
- Host function that prepares data array and passes it to the CUDA kernel.
*/
int main(void) {
int T =32,B =1;
int *a = new int(1);
int *b = new int(2);
int *c = new int(3);
int *dev_a,*dev_b,*dev_c;
cudaEvent_t start,stop;
float elapsed_time_ms;
size_t size =3;
*a = 1;
*b = 2;
*c = 3;
//cudaHostAlloc( (void**)&a, size, cudaHostAllocMapped || cudaHostAllocWriteCombined );
cudaHostAlloc( &a, 3sizeof(int), cudaHostAllocMapped); // word checking
printf("size : %d ",size);
// cudaHostAlloc( (void**)&b, sizesizeof(int), cudaHostAllocMapped || cudaHostAllocWriteCombined );
cudaHostAlloc( &b, size*sizeof(int), cudaHostAllocMapped);
cudaHostAlloc(&c, size, cudaHostAllocMapped );
// load arrays with some numbers
cudaHostGetDevicePointer(&dev_a, a, 0); // mem. copy to device not need now, but ptrs needed instead
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != CUDA_SUCCESS)printf(“Kernel launch filed with error 1 "%s".\n”,cudaGetErrorString(cudaerr));
cudaHostGetDevicePointer(&dev_b, b, 0);
cudaerr = cudaDeviceSynchronize();
if (cudaerr != CUDA_SUCCESS)printf(“Kernel launch filed with error 2 "%s".\n”,cudaGetErrorString(cudaerr));
cudaHostGetDevicePointer(&dev_c ,c, 0);
cudaerr = cudaDeviceSynchronize();
if (cudaerr != CUDA_SUCCESS)printf(“Kernel launch filed with error 3"%s".\n”,cudaGetErrorString(cudaerr));
// start time
cudaEventCreate(&start);
add<<<B,T>>>(a,b,c,dev_a,dev_b,dev_c);
cudaerr = cudaDeviceSynchronize();
if (cudaerr != CUDA_SUCCESS)printf(“Kernel launch filed with error 4"%s".\n”,cudaGetErrorString(cudaerr));
/*
cudaThreadSynchronize(); // copy back not needed but now need thread synchronization
cudaEventCreate(&stop);
// end time
// print results
cudaEventElapsedTime(&elapsed_time_ms,start,stop);
printf(“Time to calculate results: %f ms.\n”, elapsed_time_ms); // print out execution time
cudaFreeHost(a); // clean up
cudaFreeHost(b);
cudaFreeHost(c);
cudaEventDestroy(start);
cudaEventDestroy(stop);
*/
[i][i][i]return 0;
}
[/i][/i][/i]