struct Prop
{
float CPUTakeTime;
float GPUTakeTime;
char CPUName[255];
char GPUName[255];
char GPUVer[10];
char FileType[10];
unsigned int PixelHeight;
unsigned int PixelWidth;
unsigned int FileBits;
DWORD FileSize;
DWORD dataSize;
size_t num;
};
bool BMP_HOST_LOAD(char * filePath,BITMAPFILEHEADER * &header,Prop &prop);
void BMP_HOST_SAVE(char * filePath,BITMAPFILEHEADER * &buffer,Prop &prop);
void global GPU_BMP_8bit_MedianFilter( BYTE * srcbuffer,BYTE * dstbuffer,int width,int height)
{
int x=threadIdx.x+blockIdx.xblockDim.x;
int y=threadIdx.y+blockIdx.yblockDim.y;
int offent=x+yblockDim.xgridDim.x;
if(x<width && y<height)
dstbuffer[offent]=srcbuffer[offent];
}
int main()
{
char name[256]=“3.bmp”;
//CPU
BITMAPFILEHEADER * bmpbuffer;
Prop prop;
if(BMP_HOST_LOAD(name,(&bmpbuffer),prop))
{
return 0;
}
BITMAPINFOHEADER * src_info=(BITMAPINFOHEADER)(bmpbuffer+1);
BYTE * src_data =(BYTE*)(bmpbuffer)+bmpbuffer->bfOffBits;
prop.PixelHeight=src_info->biHeight;
prop.PixelWidth=src_info->biWidth;
prop.FileSize=bmpbuffer->bfSize;
prop.dataSize=bmpbuffer->bfSize-bmpbuffer->bfOffBits;
BITMAPFILEHEADER * bmpnew=(BITMAPFILEHEADER*)malloc(prop.FileSize);
memcpy(bmpnew,bmpbuffer,prop.FileSize);
BITMAPINFOHEADER * dst_info=(BITMAPINFOHEADER*)(bmpnew+1);
BYTE * dst_data =(BYTE*)(bmpnew)+bmpnew->bfOffBits;
BYTE * gpu_bmp,*gpu_bmp_new;
Prop * gpuprop;
dim3 block((prop.PixelHeight)/50,(prop.PixelWidth)/50);
dim3 thread(50,50);
cudaMalloc((void**)&gpu_bmp,prop.dataSize);
cudaMemcpy(gpu_bmp,src_data,prop.dataSize,cudaMemcpyHostToDevice);
cudaMalloc((void**)&gpu_bmp_new,prop.dataSize);
cudaMalloc((void**)&gpuprop,sizeof(Prop));
GPU_BMP_8bit_MedianFilter<<<block,thread>>>(gpu_bmp_new,gpu_bmp,prop.PixelWidth,prop.PixelHeight);
cudaMemcpy(dst_data,gpu_bmp_new,prop.dataSize,cudaMemcpyDeviceToHost);
cout<<“gpu comlete”<<endl;
BMP_HOST_SAVE(“54.bmp”,*(&bmpnew),prop);
cudaFree(gpu_bmp);
cudaFree(gpu_bmp_new);
cudaFree(gpuprop);
free(bmpbuffer);
free(bmpnew);
return 0;
}
在kernel函数GPU_BMP_8bit_MedianFilter中,不知道为什么总是无法得到数据,求帮忙