diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py
index ee465b3d51..57b00adfcf 100644
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@@ -601,16 +601,31 @@ def get_optimized_runner(
         CudaGraphRunner.is_model_supported(model_type)
         and providers[0] == "CUDAExecutionProvider"
     ):
-        options[0] = {
+        # Try to enable CUDA graph capture for maximum performance.
+        # If the model has ops that can't be fully partitioned to CUDA
+        # (e.g. Memcpy nodes), fall back gracefully without graph capture.
+        graph_options = {
             **options[0],
             "enable_cuda_graph": True,
         }
-        return CudaGraphRunner(
-            ort.InferenceSession(
+        try:
+            session = ort.InferenceSession(
+                model_path,
+                providers=providers,
+                provider_options=[graph_options] + options[1:],
+            )
+        except Exception:
+            logger.warning(
+                "CUDA graph capture not supported for this model, "
+                "falling back to CUDA execution without graph capture"
+            )
+            session = ort.InferenceSession(
                 model_path,
                 providers=providers,
                 provider_options=options,
-            ),
+            )
+        return CudaGraphRunner(
+            session,
             options[0]["device_id"],
         )