程序就是两个二维数组相加,cudamemcpy2D与cudaMallocPitch好像在处理数据的时候不对,调了一天了结果始终不对,跪求前辈指点,代码如下:
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
global void addKernel(int **c, int **a, int **b)
{
int i = threadIdx.x;
int j = threadIdx.y;
c[i][j] = a[i][j] + b[i][j];
}
int main()
{
int **a;
int **b;
int **c;
int row,col;
// Add vectors in parallel.
int **dev_a;
int **dev_b;
int **dev_c;
int width=5,height=5;
size_t size=sizeof(int)*width;
size_t pitcha,pitchb,pitchc;
a= new int *[height];
b=new int *[height];
c=new int *[height];
for (row=0;row<height;row++)
{
a[row]=new int[width];
b[row]=new int[width];
c[row]=new int[width];
for (col=0;col<width;col++)
{
a[row][col]=(row+1)*(col+1);
b[row][col]=10*(row+1)*(col+1);
c[row][col]=0;
}
}
for(int i=0;i<5;i++){
for(int j=0;j<5;j++)
printf("%4d",a[i][j]);
printf("\n");
}
for(int i=0;i<5;i++){
for(int j=0;j<5;j++)
printf("%4d",b[i][j]);
printf("\n");
}
cudaMallocPitch((void**)&dev_a,&pitcha,size,height);
cudaMallocPitch((void**)&dev_b,&pitchb,size,height);
cudaMallocPitch((void**)&dev_c,&pitchc,size,height);
cudaMemset2D(dev_a, pitcha, 0, size, height);
cudaMemset2D(dev_b, pitchb, 0, size, height);
cudaMemset2D(dev_c, pitchc, 0, size, height);
cudaMemcpy2D(dev_a,pitcha,a,size,size,height,cudaMemcpyHostToDevice);
cudaMemcpy2D(dev_b,pitchb,b,size,size,height,cudaMemcpyHostToDevice);
dim3 dimBlock(5,5);
addKernel<<<1,dimBlock>>>(dev_c,dev_a,dev_b);
cudaMemcpy2D(c,size,dev_c,pitchc,size,height,cudaMemcpyDeviceToHost);
for(int i=0;i<5;i++){
for(int j=0;j<5;j++)
printf("%4d",c[i][j]);
printf("\n");
}
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
return 0;
}