如何从gst-pipline中取Gpu帧数据来进行图像增强等相关操作

请使用下面的模版提问(创建话题后勾选相应的选项):
Jetson 模组
Jetson AGX Orin
Jetson Orin NX
Jetson Orin Nano
Jetson AGX Xavier
Jetson Xavier NX
Jetson TX 系列
Jetson Nano

Jetson 软件
JetPack 5.1.3
JetPack 5.1.4
JetPack 6.0
JetPack 6.1
DeepStream SDK
NVIDIA Isaac

SDK Manager 管理工具版本
2.1.0
其他

问题描述
我需要使用Gstreamer来实现RTSP视频增强操作,需要尽可能缩短处理延时,我想得到如何从gst-pipline中取Gpu帧数据来为图像增强提供输入的相关操作

错误码
把这里替换为错误码(无需其他信息)

错误日志
把这里替换,粘贴错误日志文本(尽量粘贴错误文本,不要只上传截图)
如果有多个日志,请使用多个代码格式化文本

找到一些英文的参考

https://stackoverflow.com/questions/49284547/gstreamer-rtsp-creating-a-pipeline-to-reduce-latency-due-to-rtpjitterbuffer

https://gstreamer.freedesktop.org/documentation/tutorials/playback/hardware-accelerated-video-decoding.html?gi-language=c

https://github.com/NVIDIA-AI-IOT/deepstream_python_apps

参考代码

低延迟RTSP处理管道

gst-launch-1.0 -v \
rtspsrc location=rtsp://your_stream protocols=tcp latency=100 buffer-mode=slave ! \
queue max-size-bytes=0 ! \
rtph264depay ! \
h264parse ! \
nvv4l2decoder enable-max-performance=1 ! \
nvvideoconvert output-buffers=1 ! \
video/x-raw(memory:NVMM),format=RGBA ! \
appsink name=mysink emit-signals=true max-buffers=1 drop=true

优化管道

omxh264enc control-rate=2 preset-level=3 bitrate=4000000 
  iframeinterval=30 insert-sps-pps=1 insert-vui=1 
  enable-twopasscbr=1

CUgraphicsResource资源创建与释放异常

error输出为Unmap failed: 999,具体的现象为图像处理函数apply_image_processing每一次调用时输入为上一次的处理结果

我想得到代码debug的帮助,是否存在相关资源在每一帧的处理前没有被正确的初始化?

回调函数handoff_handler:

static void handoff_handler(GstElement* identity, GstBuffer* buffer, GstPad* pad, gpointer user_data) {
    // 确保 CUDA 设备已初始化
    static bool cuda_initialized = false;
    if (!cuda_initialized) {
        cudaSetDevice(0);
        // cudaFree(0); // 初始化上下文
        cuda_initialized = true;
    }
    
    // 打印内存信息
    size_t free_memory, total_memory;
    cudaMemGetInfo(&free_memory, &total_memory);
    printf("GPU Memory: %zu MB free, %zu MB total\n", free_memory / (1024 * 1024), total_memory / (1024 * 1024));

    frame_count++;
    gint64 current_time = g_get_monotonic_time();
    
    // FPS计算
    if (current_time - last_print_time > G_USEC_PER_SEC) {
        gdouble fps = (gdouble)frame_count * G_USEC_PER_SEC / (current_time - last_print_time);
        printf("FPS: %.2f\n", fps);
        frame_count = 0;
        last_print_time = current_time;
    }

    // 从 GstBuffer 获取 NvBufSurface
    NvBufSurface* surf = nullptr;
    // 获取 NvBufSurface
    GstMapInfo map_info;
    if (!gst_buffer_map(buffer, &map_info, GST_MAP_READWRITE)) {
        printf("Failed to map buffer\n");
        return;
    }
    
    // 获取 NvBufSurface 指针
    surf = (NvBufSurface*)map_info.data;
    if (!surf || surf->numFilled <= 0) {
        printf("Failed to get NvBufSurface\n");
        gst_buffer_unmap(buffer, &map_info);
        return;
    }  
    // 打印bufer信息
    int p;
    printf("identity->surf : \n");
    printf("  batchSize = %d\n", surf->batchSize);
    printf("  numFilled = %d\n", surf->numFilled);
    printf("  memType = %d\n", surf->memType);
    for (int bs = 0; bs < surf->batchSize; bs++) {
        NvBufSurfaceParams *sur = &(surf->surfaceList[bs]);
        if (sur == NULL)
            break;
        printf("  surfaceList[%d]:\n", bs);
        printf("    width = %d\n", sur->width);
        printf("    height = %d\n", sur->height);
        printf("    pitch = %d\n", sur->pitch);
        printf("    layout = %d\n", sur->layout);
        printf("    colorFormat = %d\n", sur->colorFormat);
        printf("    bufferDesc = %lu\n", sur->bufferDesc);
        printf("    dataSize = %d\n", sur->dataSize);
        printf("    dataPtr = %p\n", sur->dataPtr);
        if(sur->colorFormat == NVBUF_COLOR_FORMAT_NV12_ER)
            printf("it's NVBUF_COLOR_FORMAT_NV12_ER!\n");
        else
            printf("it's not NVBUF_COLOR_FORMAT_NV12_ER(%d)\n", NVBUF_COLOR_FORMAT_NV12_ER);
        NvBufSurfacePlaneParams *planeParams = &(sur->planeParams);
        printf("    NvBufSurfaceParams->planeParams:\n");
        printf("      num_planes = %d\n", planeParams->num_planes);
        for (p = 0; p < planeParams->num_planes; p++) {
            printf("      width[%d] = %u\n", p, planeParams->width[p]);
            printf("      height[%d] = %u\n", p, planeParams->height[p]);
            printf("      pitch[%d] = %u\n", p, planeParams->pitch[p]);
            printf("      offset[%d] = %u\n", p, planeParams->offset[p]);
            printf("      psize[%d] = %u\n", p, planeParams->psize[p]);
            printf("      bytesPerPix[%d] = %u\n", p, planeParams->bytesPerPix[p]);
        }

        NvBufSurfaceMappedAddr *mAddr = &(sur->mappedAddr);
        printf("    NvBufSurfaceParams->mappedAddr:\n");
        for (p = 0; p < NVBUF_MAX_PLANES; p++) {
            if (!mAddr->addr) {
            printf("      mappedAddr->addr[%d] is NULL\n", p);
            break;
            }
            printf("      mappedAddr->addr[%d] = %p\n", p, mAddr->addr[p]);
        }

        if (mAddr->eglImage == NULL)
            printf("      mappedAddr->eglImage is NULL\n");
        else
            printf("      mappedAddr->eglImage is %p\n", mAddr->eglImage);
    }
    // ---------------------------------------------------------------------------- CUDA 版本 ---------------------------------------------------------------------------------
    // map EGLImage
    if (NvBufSurfaceMapEglImage(surf, 0) != 0) {
        printf("Failed to map EGLImage");
        gst_buffer_unmap(buffer, &map_info);
        return;
    }
    
    // 修改:从 surf 结构中获取Y平面 EGLImage
    EGLImageKHR egl_image_y = surf->surfaceList[0].mappedAddr.eglImage;
    if (egl_image_y == EGL_NO_IMAGE_KHR) {
        printf("EGLImage is NULL after mapping\n");
        NvBufSurfaceUnMapEglImage(surf, 0);
        gst_buffer_unmap(buffer, &map_info);
        return;
    }

    // 使用CUDA接口注册EGLImage
    CUgraphicsResource cuResource;
    CUresult cuRes = cuGraphicsEGLRegisterImage(&cuResource, egl_image_y, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
    if (cuRes != CUDA_SUCCESS) {
        printf("cuGraphicsEGLRegisterImage failed: %d\n", cuRes);
        NvBufSurfaceUnMapEglImage(surf, 0);
        gst_buffer_unmap(buffer, &map_info);
        return;
    }
    printf("CUDA EGL registration successful\n");
    
    // 映射CUDA资源
    CUeglFrame egl_frame_y;
    cuRes = cuGraphicsResourceGetMappedEglFrame(&egl_frame_y, cuResource, 0, 0);
    if (cuRes != CUDA_SUCCESS) {
        printf("cuGraphicsResourceGetMappedEglFrame failed with error code %d\n", cuRes);
        cuGraphicsUnmapResources(1,&cuResource,0);
        cuGraphicsUnregisterResource(cuResource);
        NvBufSurfaceUnMapEglImage(surf, 0);
        gst_buffer_unmap(buffer, &map_info);
        return;
    }
    printf("cuGraphicsResourceGetMappedEglFrame successful\n");
    printf("  frame.width = %d\n", egl_frame_y.width);
    printf("  frame.height = %d\n", egl_frame_y.height);

    // 获取 cuArray
    CUarray cuArray_y = egl_frame_y.frame.pArray[0];
    int width = surf->surfaceList[0].width;
    int height = surf->surfaceList[0].height;
    // 打印调试信息
    printf("CUDA Array: %p\n", (void*)cuArray_y);
    printf("Image dimensions: %d x %d\n", width, height);
    // 控制标志
    bool applyClahe = true;
    bool applyBrightness = false;
    float clipLimit = 0.1f;
    int tileGridX = 8, tileGridY = 8;
    float brightnessFactor = 1.5f;

    // 调用统一处理函数
    cudaError_t process_err = apply_image_processing(&cuArray_y, width, height, applyClahe, applyBrightness, clipLimit, tileGridX, tileGridY, brightnessFactor);

    if (process_err != cudaSuccess) {
        printf("Image processing failed: %s\n", cudaGetErrorString(process_err));
    }
    // --------------------------------------------------------------- CUDA end --------------------------------------------------------------------------------
    // 同步设备
    cudaDeviceSynchronize();

    cuRes = cuGraphicsUnmapResources(1, &cuResource, 0);
    if (cuRes != CUDA_SUCCESS) {
        printf("Unmap failed: %d\n", cuRes);
    }
    
    cuRes = cuGraphicsUnregisterResource(cuResource);
    if (cuRes != CUDA_SUCCESS) {
        printf("cuGraphicsUnregisterResource failed: %d\n", cuRes);
    }
    
    if (NvBufSurfaceUnMapEglImage(surf, 0) != 0) {
        printf("NvBufSurfaceUnMapEglImage failed\n");
    }
    
    gst_buffer_unmap(buffer, &map_info);
    
}

bsah输出:

GPU Memory: 25891 MB free, 30697 MB total
identity->surf : 
  batchSize = 1
  numFilled = 1
  memType = 4
  surfaceList[0]:
    width = 5120
    height = 1440
    pitch = 5120
    layout = 1
    colorFormat = 7
    bufferDesc = 61
    dataSize = 11796480
    dataPtr = 0xffff380632b0
it's NVBUF_COLOR_FORMAT_NV12_ER!
    NvBufSurfaceParams->planeParams:
      num_planes = 2
      width[0] = 5120
      height[0] = 1440
      pitch[0] = 5120
      offset[0] = 0
      psize[0] = 7864320
      bytesPerPix[0] = 1
      width[1] = 2560
      height[1] = 720
      pitch[1] = 5120
      offset[1] = 7864320
      psize[1] = 3932160
      bytesPerPix[1] = 2
    NvBufSurfaceParams->mappedAddr:
      mappedAddr->addr[0] = (nil)
      mappedAddr->addr[1] = (nil)
      mappedAddr->addr[2] = (nil)
      mappedAddr->addr[3] = (nil)
      mappedAddr->eglImage is NULL
CUDA EGL registration successful
cuGraphicsResourceGetMappedEglFrame successful
  frame.width = 5120
  frame.height = 1440
CUDA Array: 0xffff7494b800
Image dimensions: 5120 x 1440

===== Starting Image Processing =====
Image size: 5120 x 1440
Apply CLAHE: YES
Apply Brightness: NO
CLAHE parameters:
  Clip limit: 0.10
  Tile grid: 8 x 8

[CLAHE] Starting processing for 5120x1440 image
[CLAHE] Tile configuration: 8x8 tiles
[CLAHE] Clip limit: 0.10
[CLAHE] Tile size: 640x180 pixels
[CLAHE] Total tiles: 64
[CUDA] Using CUDA array: 0xffff7494b800
[CUDA] Texture object created: 0x1 (0.00 ms)
[CUDA] Surface object created: 0x2 (0.01 ms)
[MEM] Allocating histogram: 0.06 MB
[MEM] Histogram allocated: 0x205ce0000 (0.21 ms)
[MEM] Allocating CDF: 0.06 MB
[MEM] CDF allocated: 0x205cf0000 (0.01 ms)
[INIT] Zeroing histogram and CDF memory
[INIT] Histogram and CDF zeroed (0.06 ms)
[KERNEL] Launching computeHistogramKernel
         Grid: (8, 8, 1)
         Block: (256, 1, 1)
         Shared memory: 1024 bytes
[KERNEL] computeHistogramKernel completed (0.50 ms)
[KERNEL] Launching clipAndComputeCDFKernel
         Grid: (1, 1, 1)
         Block: (256, 1, 1)
[KERNEL] clipAndComputeCDFKernel completed (0.28 ms)
[KERNEL] Launching applyEqualizationKernel
         Grid: (320, 90, 1)
         Block: (16, 16, 1)
[KERNEL] applyEqualizationKernel completed (0.78 ms)
[CLEANUP] Releasing resources
[CLEANUP] Resources released (0.22 ms)
[CLAHE] Processing completed successfully
Total processing time: 0.003 seconds
===== Image Processing Complete =====

Unmap failed: 999