From 3c6937c71fce7bcb4dd8ff35ef4258ce727cb885 Mon Sep 17 00:00:00 2001
From: felalex <felalex@gmail.com>
Date: Sun, 3 May 2026 21:46:58 -0700
Subject: [PATCH] test: close coverage gaps in ORT leak fix regression suite

Audit of test_detection_runners.py against the actual fix changes
surfaced five cases where the test suite would silently pass after
a regression. Adds:

- test_fallback_warning_includes_developer_context: guards the
  enriched CUDA-graph fallback warning fields (model_type, path,
  device_id, providers) against revert to the bare form.
- test_default_sets_enable_all_optimization: guards the explicit
  else branch that pins graph_optimization_level=ORT_ENABLE_ALL,
  added to be robust against ORT default changes.
- test_gpu_mem_limit_key_present_when_cuda_query_succeeds: positive
  counterpart to the existing omit-on-failure test.
- test_cuda_graph_doubles_peak_multiplier: locks in the 7 -> 14
  multiplier relationship in compute_cuda_mem_limit.
- test_arcface_is_fixed / test_facenet_is_fixed /
  test_yolov9_license_plate_is_fixed: explicit fixed-size guards
  for the remaining enrichment models the fix targets.

28 tests pass (was 21).

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 frigate/test/test_detection_runners.py | 152 +++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/frigate/test/test_detection_runners.py b/frigate/test/test_detection_runners.py
index 4f11d7afb..2c593f7a0 100644
--- a/frigate/test/test_detection_runners.py
+++ b/frigate/test/test_detection_runners.py
@@ -40,6 +40,19 @@ class TestGetOrtSessionOptions(unittest.TestCase):
             ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
         )
 
+    def test_default_sets_enable_all_optimization(self):
+        # Guards the explicit `else` branch added so the optimization level is
+        # never implicit — protects against ORT default changes.
+        from frigate.detectors.detection_runners import get_ort_session_options
+
+        import onnxruntime as ort
+
+        opts = get_ort_session_options()
+        self.assertEqual(
+            opts.graph_optimization_level,
+            ort.GraphOptimizationLevel.ORT_ENABLE_ALL,
+        )
+
     def test_always_returns_session_options(self):
         from frigate.detectors.detection_runners import get_ort_session_options
 
@@ -98,6 +111,36 @@ class TestHasVariableLengthInputs(unittest.TestCase):
 
         self.assertFalse(ONNXModelRunner.has_variable_length_inputs(None))
 
+    def test_arcface_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.arcface.value
+            )
+        )
+
+    def test_facenet_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.facenet.value
+            )
+        )
+
+    def test_yolov9_license_plate_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.yolov9_license_plate.value
+            )
+        )
+
 
 class TestComputeCudaMemLimit(unittest.TestCase):
     @staticmethod
@@ -161,6 +204,27 @@ class TestComputeCudaMemLimit(unittest.TestCase):
 
         self.assertIsNone(compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False))
 
+    @patch("frigate.util.model.ctypes.CDLL")
+    @patch("os.path.getsize", return_value=500 * 1024 * 1024)
+    def test_cuda_graph_doubles_peak_multiplier(self, _mock_getsize, mock_cdll):
+        # cuda_graph=True must use peak_multiplier=14 (vs 7 for cuda_graph=False)
+        # because graph capture pins all intermediate tensors live simultaneously.
+        from frigate.util.model import compute_cuda_mem_limit
+
+        total_vram = 24 * 1024**3
+        mock_lib = MagicMock()
+        mock_cdll.return_value = mock_lib
+        mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
+            total_vram, total_vram
+        )
+
+        model_size = 500 * 1024 * 1024
+        with_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=True)
+        without_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False)
+        self.assertGreaterEqual(with_graph, model_size * 14)
+        self.assertGreaterEqual(without_graph, model_size * 7)
+        self.assertGreater(with_graph, without_graph)
+
     @patch("frigate.util.model.ctypes.CDLL")
     @patch("os.path.getsize", return_value=200 * 1024 * 1024)
     def test_capped_by_free_vram_when_constrained(self, _mock_getsize, mock_cdll):
@@ -380,6 +444,57 @@ class TestRunnerOmitsGpuMemLimitOnCudaQueryFailure(unittest.TestCase):
         )
 
 
+class TestRunnerInjectsGpuMemLimitOnCudaQuerySuccess(unittest.TestCase):
+    """Positive counterpart to TestRunnerOmitsGpuMemLimitOnCudaQueryFailure:
+    when cudaMemGetInfo succeeds, gpu_mem_limit must be injected into
+    provider_options so ORT's BFC arena is bounded."""
+
+    @staticmethod
+    def _fake_mem_get_info(free_value: int, total_value: int):
+        def _impl(free_ptr, total_ptr):
+            free_ptr._obj.value = free_value
+            total_ptr._obj.value = total_value
+            return 0  # cudaSuccess
+
+        return _impl
+
+    @patch("frigate.detectors.detection_runners.ort.InferenceSession")
+    @patch(
+        "frigate.detectors.detection_runners.get_ort_providers",
+        return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
+    )
+    @patch(
+        "frigate.detectors.detection_runners.is_rknn_compatible",
+        return_value=False,
+    )
+    @patch("frigate.util.model.ctypes.CDLL")
+    @patch("os.path.getsize", return_value=200 * 1024 * 1024)
+    def test_gpu_mem_limit_key_present_when_cuda_query_succeeds(
+        self, _gs, mock_cdll, _rknn, _gp, mock_session
+    ):
+        from frigate.detectors.detection_runners import get_optimized_runner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        total_vram = 24 * 1024**3
+        mock_lib = MagicMock()
+        mock_cdll.return_value = mock_lib
+        mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
+            total_vram, total_vram
+        )
+        mock_session.return_value.get_inputs.return_value = []
+        mock_session.return_value.get_outputs.return_value = []
+
+        get_optimized_runner(
+            "/fake/jina.onnx",
+            device="GPU",
+            model_type=EnrichmentModelTypeEnum.jina_v2.value,
+        )
+
+        provider_opts = mock_session.call_args.kwargs["provider_options"]
+        self.assertIn("gpu_mem_limit", provider_opts[0])
+        self.assertGreater(provider_opts[0]["gpu_mem_limit"], 0)
+
+
 class TestCudaGraphFallbackLogsException(unittest.TestCase):
     @patch("frigate.detectors.detection_runners.ort.InferenceSession")
     @patch(
@@ -417,6 +532,43 @@ class TestCudaGraphFallbackLogsException(unittest.TestCase):
         self.assertIn("CUDA graph capture failed", joined)
         self.assertIn("cudaErrorStreamCaptureUnsupported", joined)
 
+    @patch("frigate.detectors.detection_runners.ort.InferenceSession")
+    @patch(
+        "frigate.detectors.detection_runners.get_ort_providers",
+        return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
+    )
+    @patch(
+        "frigate.detectors.detection_runners.is_rknn_compatible",
+        return_value=False,
+    )
+    @patch("frigate.util.model.ctypes.CDLL", side_effect=OSError("no cuda"))
+    @patch("os.path.getsize", return_value=200 * 1024 * 1024)
+    def test_fallback_warning_includes_developer_context(
+        self, _gs, _cdll, _rknn, _gp, mock_session
+    ):
+        # Guards the enriched warning fields (model_type, device_id, providers)
+        # so a future revert to the bare "model_path + e" form is caught.
+        from frigate.detectors.detection_runners import get_optimized_runner
+        from frigate.detectors.detector_config import ModelTypeEnum
+
+        mock_session.side_effect = [
+            RuntimeError("boom"),
+            MagicMock(get_inputs=lambda: [], get_outputs=lambda: []),
+        ]
+
+        with self.assertLogs(
+            "frigate.detectors.detection_runners", level="WARNING"
+        ) as captured:
+            get_optimized_runner(
+                "/m/yolo.onnx", "GPU", ModelTypeEnum.yologeneric.value
+            )
+
+        joined = "\n".join(captured.output)
+        self.assertIn(f"model_type={ModelTypeEnum.yologeneric.value}", joined)
+        self.assertIn("path=/m/yolo.onnx", joined)
+        self.assertIn("device_id=0", joined)
+        self.assertIn("CUDAExecutionProvider", joined)
+
 
 if __name__ == "__main__":
     unittest.main()