global void MatrixSeg(int *a, int *b, int width)
{
/int i = threadIdx.x;
int value =0;
for ( int j=0; j<4;j++)
{
if (value<a[i+width/sizeof(int)*j])
{
value = a[i+width/sizeof(int)*j];
}
}
b[i] = value;/
int tx_g = blockDim.x * blockIdx.x ;
int ty_g = blockIdx.y * blockDim.y;;
__shared__ int value[1];
value[0]=0;
if (value[0]<a[ (ty_g+threadIdx.y)*width/sizeof(int) + tx_g + threadIdx.x])
{
value[0] = a[(ty_g+threadIdx.y)*width/sizeof(int) + tx_g + threadIdx.x];
}
__syncthreads();
b[blockIdx.y*3 + blockIdx.x] = value[0];//为什么这里输出的不是block块内每个线程操作数的最大值?
//a[ (ty_g + threadIdx.y)*width/sizeof(int) + tx_g + threadIdx.x];//value;
}
int main()
{
int h_a, h_b;
int size = 24 * sizeof(int);
h_a=(int)malloc(size);
h_b=(int)malloc(6 * sizeof(int));
for (int i=0; i<24; i++)
{
h_a[i] = rand()%100;
}
for (int i=0; i<24; i++)
{
printf("%d ", h_a[i]);
if ( (i+1)%6 == 0)
{
printf("%\n");
}
}
for (int i=0;i<6;i++)
{
h_b[i]=0;
}
int *d_a, *d_b;
cudaError_t cudaStatus;
size_t pitch;
cudaStatus = cudaMallocPitch((void**)&d_a,&pitch, sizeof(int)*6,4);
cudaStatus = cudaMemcpy2D(d_a,pitch,h_a,sizeof(int)*6,sizeof(int)*6,4, cudaMemcpyHostToDevice);
printf("The pitch is: %d\n", pitch);
cudaStatus = cudaMalloc((void**)&d_b, sizeof(int)*6);
cudaStatus = cudaMemcpy(d_b,h_b,sizeof(int)*6, cudaMemcpyHostToDevice);
dim3 grid(3,2,1);
dim3 thread(2,2,1);
int width = YSIZE;//block大小
MatrixSeg<<<grid,thread>>>(d_a,d_b,pitch);
cudaStatus = cudaMemcpy(h_b, d_b, 6*sizeof(int), cudaMemcpyDeviceToHost);
printf("GetSubMatrixMax:\n");
for (int i=0; i<6; i++)
{
printf("%d ", h_b[i]);
}
printf("\n");
cudaFree(d_a);
cudaFree(d_b);
//int mat[4][6];
//for(int i=0; i<4; i++)
//{
// for(int j=0; j<6; j++)
// {
// mat[i][j] = h_a[i*6+j];
// }
//}
//printf("\n mat:\n");
//for(int i=0; i<4; i++)
//{
// for(int j=0; j<6; j++)
// {
// printf("%d ",mat[i][j]);
// }
// printf("\n");
//}
//int submat[SUBSIZE/SUBWIDTH][SUBWIDTH];
////int temp=0;
//for(int i=0; i<4; i++)
//{
// for(int j=0; j<6; j++)
// {
// if(submat[i/2][j/2] < mat[i][j])
// {
// submat[i/2][j/2] = mat[i][j];
// }
// }
//}
//printf("SubMatrixMax For Test:\n");
//for (int i=0; i<6; i++)
//{
// printf(“%d “, submat[i]);
//}
//printf(”\n”);
free( h_a);
free(h_b);
getchar();
return 0;
}