__device__ void PointOfImg(char* grayData1,char* grayData2,int width1,int height1,int width2,int height2,cPoint* corner1,cPoint* corner2,int* reslut,int threadNum)
{
int i,j,k;
int sum1,sum2;
int size1,size2;
size1=width1*height1;
size2=width2*height2;
for (i=0;i<size1;i++)
{
corner1[i].x=0;
corner1[i].y=0;
corner1[i].data=0;
}
for (i=0;i<size2;i++)
{
corner2[i].x=0;
corner2[i].y=0;
corner2[i].data=0;
}
for (i=0;i<THREAD_NUM*4;i++)
{
reslut[i]=i*i;
}
}
__device__ int res[THREAD_NUM*4];
__device__ void qiu(int* reslut,int tid)
{
int width1=5;
int height1=10;
// cPoint** corner1=(cPoint**)malloc(sizeof(cPoint*)*width1);
// (*corner1)->data=(int)malloc(sizeof(int)*width1);
// (*corner1)->x=(int)malloc(sizeof(int)*width1);
// (*corner1)->y=(int)malloc(sizeof(int)*width1);
// for (int i=0;i<width1;i++)
// {
// corner1[i]=(cPoint*)malloc(sizeof(cPoint)*height1);
// corner1[i]->data=(int)malloc(sizeof(int)*height1);
// corner1[i]->x=(int)malloc(sizeof(int)*height1);
// corner1[i]->y=(int)malloc(sizeof(int)*height1);
// }
for (int i=0;i<THREAD_NUM*4;i++)
{
reslut[i]=i;
}
}
__global__ static void CudaPlite(char* gray1,char* gray2,int width1,int height1,int width2,int height2,cPoint* corner1,cPoint* corner2,int* reslut)
{
__shared__ int xyz[THREAD_NUM*4];
const int tid=threadIdx.x;
// int i;
int n=THREAD_NUM;
int len=height1/n;
int ew1,ew2;
ew1=(width1+3)/4*4;
ew2=(width2+3)/4*4;
int size1,size2;
size1=ew1*len;
size2=ew2*len;
char* data1=(char*)malloc(sizeof(char)*size1);
char* data2=(char*)malloc(sizeof(char)*size2);
// for(i=tid;i<=n;i+n)
// {
memcpy(data1,gray1+tid*size1,sizeof(char)*size1);
memcpy(data2,gray2+tid*size2,sizeof(char)*size2);
// qiu(reslut,tid);
PointOfImg(data1,data2,width1,len,width2,len,corner1,corner2,reslut,tid);
// }
free(data1);
free(data2);
//
// for (int i=0;i<THREAD_NUM*4;i++)
// {
//
// res[i]=i;
// }
// memcpy(reslut,xyz,sizeof(int)*THREAD_NUM*4);
// memcpy(reslut,res,sizeof(int)*THREAD_NUM*4);
}
inline void __checkCudaErrors(cudaError err, const char *file, const int line )
{
if(cudaSuccess != err)
{
fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",file, line, (int)err, cudaGetErrorString( err ) );
return ;
}
}
void CutGrayImg(char* grayData11,char* grayData22,int width1,int height1,int width2,int height2)
{
int n=THREAD_NUM;
int len=height1/n;
int imageSize1,imageSize2;
imageSize1=(width1+3)/4*4*height1;
imageSize2=(width2+3)/4*4*height2;
cudaSetDevice(0); //CUDA begin
cudaDeviceSynchronize();
cudaThreadSynchronize();
char* gpuImg1,* gpuImg2;
cPoint* corner1,* corner2;
int* reslut;
int xyz[THREAD_NUM*4];
int ew1,ew2;
ew1=(width1+3)/4*4;
ew2=(width2+3)/4*4;
int size1,size2;
size1=ew1*len;
size2=ew2*len;
checkCudaErrors(cudaMalloc((void**)&gpuImg1,sizeof(char)*imageSize1));
checkCudaErrors(cudaMalloc((void**)&gpuImg2,sizeof(char)*imageSize2));
checkCudaErrors(cudaMalloc((void**)&reslut,sizeof(int)*n*4));
checkCudaErrors(cudaMalloc((void**)&corner1,sizeof(cPoint)*size1));
checkCudaErrors(cudaMalloc((void**)&corner2,sizeof(cPoint)*size2));
checkCudaErrors(cudaMemset(reslut,0,sizeof(int)*n*4));
checkCudaErrors(cudaMemcpy(gpuImg1,grayData11,sizeof(char)*imageSize1,cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(gpuImg2,grayData22,sizeof(char)*imageSize2,cudaMemcpyHostToDevice));
CudaPlite<<<1,THREAD_NUM,THREAD_NUM*4*sizeof(int)>>>(gpuImg1,gpuImg2,width1,height1,width2,height2,corner1,corner2,reslut);
checkCudaErrors(cudaMemcpy(xyz,reslut,sizeof(int)*n*4,cudaMemcpyDeviceToHost));
int flag=cudaDeviceSynchronize();
cudaFree(gpuImg1);
cudaFree(gpuImg2);
cudaFree(reslut);
cudaFree(corner1);
cudaFree(corner2);
int i,j,k;
int w,h;
int x,y;
int re[THREAD_NUM];
for (j=0;j<THREAD_NUM;j++)
{
i=j*4;
w=xyz[i+1]-xyz[i+3];
h=xyz[i]-xyz[i+2];
re[j]=0;
for (k=0;k<THREAD_NUM;k++)
{
i=k*4;
x=xyz[i+1]-xyz[i+3];
y=xyz[i]-xyz[i+2];
if (x==w&&y==h)
{
re[j]++;
}
}
}
int max;
max=i=0;
for (j=0;j<THREAD_NUM;j++)
{
if (max<re[j])
{
max=re[j];
i=j;
}
}
i=i*4;
xy[0]=xyz[i];
xy[1]=xyz[i+1];
xy[2]=xyz[i+2];
xy[3]=xyz[i+3];
}
以上是我的代码,在CutGrayImg函数中我想将reslut数组的值复制给xyz,但不能成功,不知道为什么?希望有人能指出错误点,谢谢!