想请教一个问题,麻烦解答一下!非常感谢!
我之前用的一维float3类型数组表示的三维的float3类型数组写的程序没有问题
后来使用纹理绑定了3D的CUDA数组写的程序始终在运算一直没有停
不知道使用这个纹理和程序不正常有关系没!
我仿照使用的下面的代码
#define my_point float4
texture<my_point, 3, cudaReadModeElementType> tex; // 3D texture ElementType
global void d_render(my_point *d_output)
{
((d_output+threadIdx.zblockDim.xblockDim.y+threadIdx.yblockDim.x+threadIdx.x)).x = tex3D(tex, threadIdx.x, threadIdx.y, threadIdx.z).x; // read from 3D texture
((d_output+threadIdx.zblockDim.xblockDim.y+threadIdx.yblockDim.x+threadIdx.x)).y = tex3D(tex, threadIdx.x, threadIdx.y, threadIdx.z).y;
if(tex3D(tex, threadIdx.x, threadIdx.y, threadIdx.z).z < 9)
((d_output+threadIdx.zblockDim.xblockDim.y+threadIdx.yblockDim.x+threadIdx.x)).z = 8;
else
((d_output+threadIdx.zblockDim.xblockDim.y+threadIdx.yblockDim.x+threadIdx.x)).z = tex3D(tex, threadIdx.x, threadIdx.y, threadIdx.z).z;
}
int main( )
{
my_point h_volume = (my_point )malloc(sizeof(my_point)357);
memset(h_volume, 0, sizeof(my_point)357);
for (int i=0;i<7;++i)
for (int j=0;j<5;++j)
for (int k=0;k<3;++k)
{
((h_volume+k+j3+i35)).x=(float)(k+j3+i35+1);
((h_volume+k+j3+i35)).y=(float)(k+j3+i35+1);
((h_volume+k+j3+i35)).z=(float)(k+j3+i35+1);
((h_volume+k+j3+i35)).w=(float)(k+j3+i35+1);
}
for (int i=0;i<7;++i)
{
for (int j=0;j<5;++j)
{
for (int k=0;k<3;++k)
{
printf(“%f “, ((h_volume+k+j3+i35)).z);
}
printf(”\n”);
}
printf(“\n\n”);
}
cudaArray *d_volumeArray;
// create 3D array
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<my_point>();
const cudaExtent volumeSize = make_cudaExtent(3, 5, 7);
cutilSafeCall( cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize) );
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)h_volume, volumeSize.width*sizeof(my_point), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
cutilSafeCall( cudaMemcpy3D(©Params) );
// set texture parameters
tex.normalized = false; // access with normalized texture coordinates
tex.filterMode = cudaFilterModePoint; // linear interpolation
tex.channelDesc = channelDesc;
//tex.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
//tex.addressMode[1] = cudaAddressModeWrap;
//tex.addressMode[2] = cudaAddressModeWrap;
// bind array to 3D texture
if (cudaBindTextureToArray(tex, d_volumeArray, channelDesc) != (unsigned int)CUDA_SUCCESS )
{
printf(“could not bind texture!”);
return;
}
my_point d_output;
cudaMalloc((void **)&d_output,sizeof(my_point)357);
cudaMemset(d_output, 0, sizeof(my_point)35*7);
dim3 gridSize(1,1,1);
dim3 blockSize(3, 5, 7);
d_render<<<gridSize, blockSize>>>(d_output);
my_point h_output = (my_point )malloc(sizeof(my_point)357);
memset(h_output, 2, sizeof(my_point)357);
cudaMemcpy(h_output, d_output, sizeof(my_point)357, cudaMemcpyDeviceToHost);
for (int i=0;i<7;++i)
{
for (int j=0;j<5;++j)
{
for (int k=0;k<3;++k)
{
printf("%f ", ((h_output+k+j3+i3*5)).z);
}
printf(“\n”);
}
printf(“\n\n”);
}
system(“pause”);
}