tensorRT和tensorflow同时使用出现CUDA资源冲突问题

使用tensorrt对resnet_v1_50分类网络进行加速,使用tensorflow对图片数据进行预处理,CUDA error出现在common.do_inference函数中。
do_inference函数代码,出错代码行context.execute_async:
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

Transfer input data to the GPU.

[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]

Run inference.

context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)

Transfer predictions back from the GPU.

[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]

Synchronize the stream

stream.synchronize()

Return only the host outputs.

return [out.host for out in outputs]

主程序代码:
from random import randint
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import sys, os
import common
from preprocessing import preprocessing_factory
from nets import nets_factory
import tensorflow as tf
import time

You can set the logger severity higher to suppress messages (or lower to display more messages).

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

class ModelData(object):
MODEL_FILE = os.path.join(os.path.dirname(file), “model.uff”)
INPUT_NAME =“input”
INPUT_SHAPE = (3, 224, 224)
OUTPUT_NAME = “resnet_v1_50/SpatialSqueeze”

def build_engine(model_file):

For more information on TRT basics, refer to the introductory samples.

print(“build engine begin\n”)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
builder.max_batch_size = 1
#builder.int8_mode = True
#builder.fp16_mode = True
builder.max_workspace_size = common.GiB(1)

Parse the Uff Network

parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE, trt.UffInputOrder.NHWC)
parser.register_output(ModelData.OUTPUT_NAME)
parser.parse(model_file, network)

Build and return an engine.

return builder.build_cuda_engine(network)

def main():
#data_path = common.find_sample_data(description=“Runs an MNIST network using a UFF model file”, subfolder=“mnist”)
model_file = ModelData.MODEL_FILE

g = tf.Graph()
sess = tf.Session(graph=g)
with g.as_default():
image = tf.placeholder(tf.uint8, shape=[None, None, 3])
model_name = “resnet_v1_50”
network_fn = nets_factory.get_network_fn(model_name, 8, is_training=False)
image_size = network_fn.default_image_size
image_preprocessing_fn = preprocessing_factory.get_preprocessing(model_name,is_training=False)
arg_scope = nets_factory.arg_scopes_mapmodel_name
pre_image = image_preprocessing_fn(image, image_size, image_size)
input = tf.expand_dims(pre_image, 0)

with build_engine(model_file) as engine:

Build an engine, allocate buffers and create a stream.

For more information on buffer allocation, refer to the introductory samples.

inputs, outputs, bindings, stream = common.allocate_buffers(engine)
pagelocked_buffer=inputs[0].host

with engine.create_execution_context() as context:
image_path = os.path.join(“test.bmp”)
img = Image.open(image_path)
img = np.array(img)

input_ = sess.run(pre_image, feed_dict={image: img})
input_ = np.array(input_.ravel())
np.copyto(pagelocked_buffer, input_)
[output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
if name == ‘main’:
main()