device 函数如何申请大数组

device void GPUTestTest(int** A, int** B,int** C)
{
int a[10000][20]; //这里需要创建二维数组a,最好能是动态数组;
//求助:这个数组怎么分配GPU显存呢?
a[1][0]=1;
C[1][1]=a[1][0]*100;
}

global void GPUkernelFunction(int** A, int** B, int** C, int row, int col) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < row && y < col)
{
GPUFunctionTest(A, B, C);
}
}

int main() {
int row = 20000, col = 20, h = 32, w = 32;

GPUkernelFunction<< <grid, block >> > (d_A, d_B, d_C, row, col);

}