有关CUDA编程的纹理存储器...为什么处理后图片为纯黑，图片为bmp格式

system · 2012 年1 月 17 日 12:03

有关CUDA编程的纹理存储器…为什么处理后图片为纯黑，图片为bmp格式

代码如下：

代码如下#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <math.h>

#include “readandsave.h”

// includes, project

#include <cutil_inline.h>

// includes, kernels

#include <simpleTexture_kernel.cu>

char *image_filename = “test.bmp”;

char out_filename = “lena_out.bmp”;

float angle = 0.5f; // angle to rotate image by (in radians)

#define MIN_EPSILON_ERROR 5e-3f

////////////////////////////////////////////////////////////////////////////////

// declaration, forward

void runTest( int argc, char** argv);

extern “C”

void computeGold( float* reference, float* idata, const unsigned int len);

////////////////////////////////////////////////////////////////////////////////

// Program main

////////////////////////////////////////////////////////////////////////////////

int

main( int argc, char** argv)

{

runTest( argc, argv);

}

////////////////////////////////////////////////////////////////////////////////

//! Run a simple test for CUDA

////////////////////////////////////////////////////////////////////////////////

void

runTest( int argc, char** argv)

{

// load image from disk

readBmp(image_filename);

int j;

printf("source_data:\n");

for (j = 0; j<20;j++)

{

    printf("%d ",pBmpBuf[j]);

    printf("%f\n",sinf(j));

}

printf("\nwidth:%d,height:%d\n",width,height);

cutilDeviceSynchronize();

clock_t first, second;

first=clock();        //开始计时

unsigned int size_unsignedchar = height * width * sizeof(unsigned char);

unsigned char * d_odata;

cudaMalloc( (void**) &d_odata, size_unsignedchar);

unsigned char * d_idata;

cudaMalloc( (void**) &d_idata, size_unsignedchar);

// allocate array and copy image data

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);

cudaArray* cu_array;

cudaMallocArray( &cu_array, &channelDesc, width, height );

if(cudaMemcpyToArray( cu_array, 0, 0, pBmpBuf, size_unsignedchar, cudaMemcpyHostToDevice) == cudaSuccess)

{

    printf("cudaMemcpyToArray Success\n");

}

// set texture parameters

tex.addressMode[0] = cudaAddressModeWrap;

tex.addressMode[1] = cudaAddressModeWrap;

tex.filterMode = cudaFilterModeLinear;

tex.normalized = true; // access with normalized texture coordinates

// Bind the array to the texture

if (cudaBindTextureToArray( tex, cu_array, channelDesc) == cudaSuccess)

{

    printf("cudaBindTexToArray Success\n");

}

dim3 dimBlock(8, 8, 1);

dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);

// warmup

transformKernel<<< dimGrid, dimBlock, 0 >>>( d_odata, width, height, angle);

transformKernel<<< dimGrid, dimBlock, 0 >>>( d_odata, width, height, angle);

printf("kernel done!\n");





cutilDeviceSynchronize() ;

// allocate mem for the result on host side

unsigned char* h_odata = (unsigned char*) malloc( size_unsignedchar);

// copy result from device to host

if(cudaMemcpy( h_odata, d_odata, size_unsignedchar, cudaMemcpyDeviceToHost) == cudaSuccess)

{

    printf("cudaMemcpyDeviceToHost Success!\n");

}

second=clock();            //停止计时

printf("The time cost is: %.0f ms\n",(double)second-(double)first);

for (j = 0; j<20;j++)

{

    printf("%d ",h_odata[j]);

}

// write result to file

saveBmp(out_filename,h_odata,width,height,biBitCount,pColorTable);

cudaFree(d_odata);

cudaFreeArray(cu_array);

cutilDeviceReset();

getchar();

getchar();

}

system · 2012 年1 月 18 日 05:56

unsigned int size_unsignedchar = height * width * sizeof(unsigned char);
…
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
cudaMallocArray( &cu_array, &channelDesc, width, height );
…
cudaMemcpyToArray( cu_array, 0, 0, pBmpBuf, size_unsignedchar, cudaMemcpyHostToDevice)

eg: height=10,width = 10
size_unsignedchar = 1010;
而cu_array的大小是：1010*4