diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py
index 98c4fe9b5..06e74e1c9 100644
--- a/frigate/detectors/plugins/tensorrt.py
+++ b/frigate/detectors/plugins/tensorrt.py
@@ -74,6 +74,8 @@ class TensorRtDetector(DetectionApi):
             ctypes.cdll.LoadLibrary(
                 "/usr/local/lib/python3.9/dist-packages/tensorrt/libnvinfer.so.8"
             )
+            trt.init_libnvinfer_plugins(self.trt_logger, "")
+
             ctypes.cdll.LoadLibrary(
                 "/media/frigate/models/tensorrt_demos/yolo/libyolo_layer.so"
             )
@@ -82,8 +84,10 @@ class TensorRtDetector(DetectionApi):
                 "ERROR: failed to load libraries. %s",
                 e,
             )
-        with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
-            return runtime.deserialize_cuda_engine(f.read())
+
+        self.runtime = trt.Runtime(self.trt_logger)
+        with open(model_path, "rb") as f:
+            return self.runtime.deserialize_cuda_engine(f.read())
 
     def _get_input_shape(self):
         """Get input shape of the TensorRT YOLO engine."""
@@ -118,28 +122,28 @@ class TensorRtDetector(DetectionApi):
                 raise ValueError(
                     "bad dims of binding %s: %s" % (binding, str(binding_dims))
                 )
-            nbytes = (
-                size
-                * np.dtype(trt.nptype(self.engine.get_binding_dtype(binding))).itemsize
-            )
+            nbytes = size * self.engine.get_binding_dtype(binding).itemsize
             # Allocate host and device buffers
             err, host_mem = cuda.cuMemHostAlloc(
                 nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP
             )
             assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}"
+            logger.debug(
+                f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_binding_dtype(binding)})"
+            )
             err, device_mem = cuda.cuMemAlloc(nbytes)
             assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}"
             # Append the device buffer to device bindings.
             bindings.append(int(device_mem))
             # Append to the appropriate list.
             if self.engine.binding_is_input(binding):
-                logger.info(f"Input has Shape {binding_dims}")
+                logger.debug(f"Input has Shape {binding_dims}")
                 inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size))
             else:
                 # each grid has 3 anchors, each anchor generates a detection
                 # output of 7 float32 values
                 assert size % 7 == 0, f"output size was {size}"
-                logger.info(f"Output has Shape {binding_dims}")
+                logger.debug(f"Output has Shape {binding_dims}")
                 outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size))
                 output_idx += 1
         assert len(inputs) == 1, f"inputs len was {len(inputs)}"
@@ -153,21 +157,14 @@ class TensorRtDetector(DetectionApi):
         Inputs and outputs are expected to be lists of HostDeviceMem objects.
         """
         # Transfer input data to the GPU.
-        [
-            cuda.cuMemcpyHtoDAsync(inp.device, inp.host, inp.nbytes, self.stream)
-            for inp in self.inputs
-        ]
+        [cuda.cuMemcpyHtoD(inp.device, inp.host, inp.nbytes) for inp in self.inputs]
         # Run inference.
-        self.context.execute_async_v2(
-            bindings=self.bindings, stream_handle=self.stream.getPtr()
-        )
+        if not self.context.execute_v2(bindings=self.bindings):
+            logger.warn(f"Execute returned false")
         # Transfer predictions back from the GPU.
-        [
-            cuda.cuMemcpyDtoHAsync(out.host, out.device, out.nbytes, self.stream)
-            for out in self.outputs
-        ]
+        [cuda.cuMemcpyDtoH(out.host, out.device, out.nbytes) for out in self.outputs]
         # Synchronize the stream
-        cuda.cuStreamSynchronize(self.stream)
+        # cuda.cuStreamSynchronize(self.stream)
         # Return only the host outputs.
         return [
             np.array(
@@ -177,8 +174,11 @@ class TensorRtDetector(DetectionApi):
         ]
 
     def __init__(self, detector_config: TensorRTDetectorConfig):
-        # def __init__(self, detector_config: DetectorConfig, model_path: str):
-        # self.fps = EventsPerSecond()
+        (cuda_err,) = cuda.cuInit(0)
+        assert (
+            cuda_err == cuda.CUresult.CUDA_SUCCESS
+        ), f"Failed to initialize cuda {cuda_err}"
+        err, self.cu_ctx = cuda.cuCtxCreate(cuda.CUctx_flags.CU_CTX_MAP_HOST, 0)
         self.conf_th = 0.4  ##TODO: model config parameter
         self.nms_threshold = 0.4
         self.trt_logger = TrtLogger()
@@ -206,8 +206,13 @@ class TensorRtDetector(DetectionApi):
         del self.inputs
         cuda.cuStreamDestroy(self.stream)
         del self.stream
+        del self.engine
+        del self.runtime
+        del self.context
+        del self.trt_logger
+        cuda.cuCtxDestroy(self.cu_ctx)
 
-    def _postprocess_yolo(self, trt_outputs, img_w, img_h, conf_th, nms_threshold):
+    def _postprocess_yolo(self, trt_outputs, conf_th):
         """Postprocess TensorRT outputs.
         # Args
             trt_outputs: a list of 2 or 3 tensors, where each tensor
@@ -240,16 +245,10 @@ class TensorRtDetector(DetectionApi):
         # normalize
         # tensor_input /= 255.0
 
-        self.inputs[0].host = np.ascontiguousarray(tensor_input)
+        self.inputs[0].host = np.ascontiguousarray(tensor_input.astype(np.float32))
         trt_outputs = self._do_inference()
 
-        raw_detections = self._postprocess_yolo(
-            trt_outputs,
-            tensor_input.shape[1],
-            tensor_input.shape[0],
-            self.conf_th,
-            nms_threshold=self.nms_threshold,
-        )
+        raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
 
         if len(raw_detections) == 0:
             return np.zeros((20, 6), np.float32)