有关CUDA编程的纹理存储器…为什么处理后图片为纯黑,图片为bmp格式
代码如下:
代码如下#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include “readandsave.h”
// includes, project
#include <cutil_inline.h>
// includes, kernels
#include <simpleTexture_kernel.cu>
char *image_filename = “test.bmp”;
char out_filename = “lena_out.bmp”;
float angle = 0.5f; // angle to rotate image by (in radians)
#define MIN_EPSILON_ERROR 5e-3f
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);
extern “C”
void computeGold( float* reference, float* idata, const unsigned int len);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
// load image from disk
readBmp(image_filename);
int j;
printf("source_data:\n");
for (j = 0; j<20;j++)
{
printf("%d ",pBmpBuf[j]);
printf("%f\n",sinf(j));
}
printf("\nwidth:%d,height:%d\n",width,height);
cutilDeviceSynchronize();
clock_t first, second;
first=clock(); //开始计时
unsigned int size_unsignedchar = height * width * sizeof(unsigned char);
unsigned char * d_odata;
cudaMalloc( (void**) &d_odata, size_unsignedchar);
unsigned char * d_idata;
cudaMalloc( (void**) &d_idata, size_unsignedchar);
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
cudaArray* cu_array;
cudaMallocArray( &cu_array, &channelDesc, width, height );
if(cudaMemcpyToArray( cu_array, 0, 0, pBmpBuf, size_unsignedchar, cudaMemcpyHostToDevice) == cudaSuccess)
{
printf("cudaMemcpyToArray Success\n");
}
// set texture parameters
tex.addressMode[0] = cudaAddressModeWrap;
tex.addressMode[1] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true; // access with normalized texture coordinates
// Bind the array to the texture
if (cudaBindTextureToArray( tex, cu_array, channelDesc) == cudaSuccess)
{
printf("cudaBindTexToArray Success\n");
}
dim3 dimBlock(8, 8, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
// warmup
transformKernel<<< dimGrid, dimBlock, 0 >>>( d_odata, width, height, angle);
transformKernel<<< dimGrid, dimBlock, 0 >>>( d_odata, width, height, angle);
printf("kernel done!\n");
cutilDeviceSynchronize() ;
// allocate mem for the result on host side
unsigned char* h_odata = (unsigned char*) malloc( size_unsignedchar);
// copy result from device to host
if(cudaMemcpy( h_odata, d_odata, size_unsignedchar, cudaMemcpyDeviceToHost) == cudaSuccess)
{
printf("cudaMemcpyDeviceToHost Success!\n");
}
second=clock(); //停止计时
printf("The time cost is: %.0f ms\n",(double)second-(double)first);
for (j = 0; j<20;j++)
{
printf("%d ",h_odata[j]);
}
// write result to file
saveBmp(out_filename,h_odata,width,height,biBitCount,pColorTable);
cudaFree(d_odata);
cudaFreeArray(cu_array);
cutilDeviceReset();
getchar();
getchar();
}