最近在学习Layered Textures,自己尝试编写代码,但是得到的结果却跟预想的不一样,请大家帮忙看一下是什么问题,代码如下
#include <cuda_runtime.h>
texture<int, cudaTextureType2DLayered> texref;
cudaArray_t cuarray;
__global__ void kernel(int *e)
{
int x = threadIdx.x;
int y = threadIdx.y;
int z = threadIdx.z;
int temp = tex2DLayered(texref, (float)x, (float)y, z);
e[z*4*6+y*4+x] = temp;
}
int main()
{
int *a, *b, *c;
a = (int*)malloc(4*6*sizeof(int));
b = (int*)malloc(4*6*sizeof(int));
c = (int*)malloc(4*6*sizeof(int));
for(int i=0; i<4; i++)
{
for(int j=0; j<6; j++)
{
a[i*6+j] = i + j;
b[i*6+j] = i + j + 1;
c[i*6+j] = i + j + 2;
}
}
int *d;
d = (int*)malloc(4*6*sizeof(int)*3);
memcpy(d, a, 4*6*sizeof(int));
memcpy(d+4*6, b, 4*6*sizeof(int));
memcpy(d+2*4*6, c, 4*6*sizeof(int));
int *e;
e = (int*)malloc(4*6*sizeof(int)*3);
cudaChannelFormatDesc cudesc = cudaCreateChannelDesc<int>();
cudaExtent ext = make_cudaExtent(4, 6, 3);
cudaMalloc3DArray(&cuarray, &cudesc, ext, cudaArrayLayered);
cudaMemcpy3DParms pam = {0};
pam.dstArray = cuarray;
pam.extent = ext;
pam.srcPtr = make_cudaPitchedPtr(d, ext.width*sizeof(int), ext.width, ext.height);
cudaMemcpy3D(&pam);
cudaBindTextureToArray(&texref, cuarray, &cudesc);
int *dev_e;
cudaMalloc(&dev_e, 4*6*sizeof(int)*3);
dim3 blk(4,6,3);
kernel<<<1, blk>>>(dev_e);
cudaMemcpy(e, dev_e, 4*6*3*sizeof(int), cudaMemcpyDeviceToHost);
}
计算完后host端的e中都是0,而不是d中的内容。请大家帮帮忙看一下,谢谢了。