#include <stdio.h>
#include <cutil.h>
#include <cuda_runtime.h>
#define Width 2
#define Height 2
#define Depth 2
int bmp1[Width][Height][Depth]={11,22,33,44,55,66,77,88};
int bmp2[Width][Height][Depth];
int main(){
cudaPitchedPtr devPitchedPtr;
cudaExtent extent = make_cudaExtent(2, 2, 2);
cudaMalloc3D(&devPitchedPtr, extent);
cudaMemset3D( devPitchedPtr, 0, extent);
cudaError status;
cudaMemcpy3DParms HostToDev = { 0 };
HostToDev.srcPtr = make_cudaPitchedPtr((void*)bmp1, sizeof(int), 2, 2);
HostToDev.dstPtr = devPitchedPtr;
HostToDev.extent = extent;
HostToDev.kind = cudaMemcpyHostToDevice;
status = cudaMemcpy3D(&HostToDev);
if(status != cudaSuccess){
fprintf(stderr, "MemcpyHtD: %s\n", cudaGetErrorString(status));
}
cudaMemcpy3DParms DevToHost = {0};
DevToHost.srcPtr = devPitchedPtr;
DevToHost.dstPtr = make_cudaPitchedPtr((void*)bmp2, sizeof(int), 2, 2);
DevToHost.extent = extent;
DevToHost.kind = cudaMemcpyDeviceToHost;
status = cudaMemcpy3D(&DevToHost);
if(status != cudaSuccess){
fprintf(stderr, "MemcpyHtD: %s\n", cudaGetErrorString(status));
}
cudaFree(&devPitchedPtr);
int i, j, k;
for(i=0; i<2; i++)
for(j=0; j<2; j++)
for(k=0; k<2; k++)
printf("bmp2[%d][%d][%d]=%d\n", i, j, k, bmp2[i][j][k]);
return 0;
}
问题1:cudaError_t cudaMalloc3D (struct cudaPitchedPtr * pitchedDevPtr, struct cudaExtent extent)
在设备上分配至少width * height * depth bytes的内存, 返回的pitchedDevPtr包含有xsize和ysize,也就是extent中的width和height,请问对于没有出现的depth怎么理解?
问题2:结果为
bmp2[0][0][0]=11
bmp2[0][0][1]=22
bmp2[0][1][0]=33
bmp2[0][1][1]=44
bmp2[1][0][0]=0
bmp2[1][0][1]=0
bmp2[1][1][0]=0
bmp2[1][1][1]=0
请按任意键继续. . .
按自己的理解结果应该为
bmp2[0][0][0]=11
bmp2[0][0][1]=22
bmp2[0][1][0]=33
bmp2[0][1][1]=44
bmp2[1][0][0]=55
bmp2[1][0][1]=66
bmp2[1][1][0]=77
bmp2[1][1][1]=88
知道问题出在make_cudaPitchedPtr的内存对齐上,对这点不是很理解,请大家帮忙指导一下,谢谢。
如果是3维数据,那么数据的寻址方式是:
Host Code:
…
cudaPitchedPtr data;
extent = make_cudaExtent(width* sizeof(float4), height, depth);
cudaMalloc3D(&(d_data), extent);
…
Kernel Code:
global kernel(char* data, size_t pitch, …) {
…
float4 element = ((float4) (data + (xsizeof(float4) + ypitch + zpitchheight)));
…
}
也就是pitch 指的是实际分配的图像宽,它一般是大于原始的图像宽的
没看懂。不好意思