diff --git a/docker-compose.yml b/docker-compose.yml
index a4d349194..0ed59c697 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,16 +12,17 @@ services:
     build:
       context: .
       dockerfile: docker/main/Dockerfile
-      # Use target devcontainer-trt for TensorRT dev
       target: devcontainer
-    ## Uncomment this block for nvidia gpu support
+    # Run this command to build the devcontainer with tensorrt support, then uncomment the image and deploy blocks and comment the build block above
+    # ARCH=amd64 docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:devcontainer-trt devcontainer-trt
+    # image: frigate:devcontainer-trt
     # deploy:
-    #       resources:
-    #           reservations:
-    #               devices:
-    #                   - driver: nvidia
-    #                     count: 1
-    #                     capabilities: [gpu]
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
     environment:
       YOLO_MODELS: yolov7-320
     devices:
diff --git a/docker/tensorrt/trt.hcl b/docker/tensorrt/trt.hcl
index 3195fb5bf..e4221c155 100644
--- a/docker/tensorrt/trt.hcl
+++ b/docker/tensorrt/trt.hcl
@@ -95,4 +95,5 @@ target "devcontainer-trt" {
   }
   platforms = ["linux/amd64"]
   target = "devcontainer-trt"
+  tags = ["frigate:devcontainer-trt"]
 }
diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py
index 2a57ec2d3..0c49056d4 100644
--- a/frigate/detectors/plugins/tensorrt.py
+++ b/frigate/detectors/plugins/tensorrt.py
@@ -1,4 +1,5 @@
 import ctypes
+import json
 import logging
 
 import numpy as np
@@ -15,7 +16,8 @@ from pydantic import Field
 from typing_extensions import Literal
 
 from frigate.detectors.detection_api import DetectionApi
-from frigate.detectors.detector_config import BaseDetectorConfig
+from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
+from frigate.detectors.util import yolov8_postprocess
 
 logger = logging.getLogger(__name__)
 
@@ -85,28 +87,34 @@ class TensorRtDetector(DetectionApi):
                 e,
             )
 
+        if self.model_type == ModelTypeEnum.yolov8:
+            with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
+                meta_len = int.from_bytes(
+                    f.read(4), byteorder="little"
+                )  # read metadata length
+                metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
+                model = runtime.deserialize_cuda_engine(f.read())  # read engine
+                return model
+
         with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
             return runtime.deserialize_cuda_engine(f.read())
 
-    def _get_input_shape(self):
+    def _get_input_output_shape(self):
         """Get input shape of the TensorRT YOLO engine."""
-        binding = self.engine[0]
-        assert self.engine.binding_is_input(binding)
-        binding_dims = self.engine.get_binding_shape(binding)
-        if len(binding_dims) == 4:
-            return (
-                tuple(binding_dims[2:]),
-                trt.nptype(self.engine.get_binding_dtype(binding)),
-            )
-        elif len(binding_dims) == 3:
-            return (
-                tuple(binding_dims[1:]),
-                trt.nptype(self.engine.get_binding_dtype(binding)),
-            )
-        else:
-            raise ValueError(
-                "bad dims of binding %s: %s" % (binding, str(binding_dims))
+        input_shape = None
+        output_shape = None
+        for i in range(self.engine.num_bindings):
+            name = self.engine.get_tensor_name(i)
+            shape = (
+                tuple(self.engine.get_binding_shape(name)),
+                trt.nptype(self.engine.get_binding_dtype(name)),
             )
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                input_shape = shape
+            else:
+                output_shape = shape
+        assert output_shape is not None and input_shape is not None
+        return input_shape, output_shape
 
     def _allocate_buffers(self):
         """Allocates all host/device in/out buffers required for an engine."""
@@ -217,8 +225,9 @@ class TensorRtDetector(DetectionApi):
         self.nms_threshold = 0.4
         err, self.stream = cuda.cuStreamCreate(0)
         self.trt_logger = TrtLogger()
+        self.model_type = detector_config.model.model_type
         self.engine = self._load_engine(detector_config.model.path)
-        self.input_shape = self._get_input_shape()
+        self.input_shape, self.output_shape = self._get_input_output_shape()
 
         try:
             self.context = self.engine.create_execution_context()
@@ -261,7 +270,9 @@ class TensorRtDetector(DetectionApi):
         # filter low-conf detections and concatenate results of all yolo layers
         detections = []
         for o in trt_outputs:
+            # group outputs into arrs of 7
             dets = o.reshape((-1, 7))
+            # box_confidence x class_prob >= conf_th
             dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
             detections.append(dets)
         detections = np.concatenate(detections, axis=0)
@@ -284,6 +295,10 @@ class TensorRtDetector(DetectionApi):
             tensor_input.astype(self.input_shape[-1])
         )
         trt_outputs = self._do_inference()
+        if self.model_type == ModelTypeEnum.yolov8:
+            return yolov8_postprocess(
+                self.input_shape[0], trt_outputs[0].reshape(self.output_shape[0])
+            )
 
         raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
 
@@ -298,10 +313,13 @@ class TensorRtDetector(DetectionApi):
         # Reorder elements by the score, best on top, remove class_prob
         ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
         # transform width to right with clamp to 0..1
+        # right of box
         ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
         # transform height to bottom with clamp to 0..1
+        # bottom of box
         ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
         # put result into the correct order and limit to top 20
+        # [class_id, box_confidence, y_min/h, x_min/w, y_max/h, x_max/w]
         detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
 
         # pad to 20x6 shape
diff --git a/frigate/detectors/util.py b/frigate/detectors/util.py
index db1b9f794..5240f4a0b 100644
--- a/frigate/detectors/util.py
+++ b/frigate/detectors/util.py
@@ -79,5 +79,6 @@ def yolov8_postprocess(
                 np.argpartition(detections[:, 1], -box_count)[-box_count:]
             ]
         detections = detections.copy()
-    detections.resize((box_count, 6))
-    return detections
+    # sort detections by confidence
+    detections = detections[detections[:, 1].argsort()[::-1]]
+    return np.resize(detections, (box_count, 6))