要调用一个内核函数实现矩阵的扩边,即把矩阵A(nx,nz)扩充成B(nx+2npad,nz+2npad),B中间部分是A的值,扩边部分的值等于相邻A的边上的值,程序实现时单纯复制A的部分没有问题,扩边部分出了问题,如图
代码如下
__global__ void pad_float_2d(float *out, unsigned long pitch_out, float *in, unsigned long pitch_in,int xrange, int zrange, int npadded ){
int ix=blockIdx.y*blockDim.y+threadIdx.y;
int jz=blockIdx.x*blockDim.x+threadIdx.x;
if(ix>=npadded && ix<xrange-npadded && jz>=npadded && jz<zrange-npadded )
{
out[ix*pitch_out+jz]=in[(ix-npadded)*pitch_in+(jz-npadded)];
}
__syncthreads();
if(ix<npadded && jz>=npadded && jz<zrange-npadded )
{
out[ix*pitch_out+jz]=out[npadded*pitch_out+jz];
out[(xrange-ix-1)*pitch_out+jz]=out[(xrange-npadded-1)*pitch_out+jz];
}
__syncthreads();
if(ix<xrange && jz<npadded )
{
out[ix*pitch_out+jz]=out[ix*pitch_out+npadded];
out[ix*pitch_out+(zrange-jz-1)]=out[ix*pitch_out+(zrange-npadded-1)];
}
return;
}
[ 本帖最后由 xiaomasddp 于 2010-7-16 20:27 编辑 ]