test: close coverage gaps in ORT leak fix regression suite

Audit of test_detection_runners.py against the actual fix changes surfaced five cases where the test suite would silently pass after a regression. Adds: - test_fallback_warning_includes_developer_context: guards the enriched CUDA-graph fallback warning fields (model_type, path, device_id, providers) against revert to the bare form. - test_default_sets_enable_all_optimization: guards the explicit else branch that pins graph_optimization_level=ORT_ENABLE_ALL, added to be robust against ORT default changes. - test_gpu_mem_limit_key_present_when_cuda_query_succeeds: positive counterpart to the existing omit-on-failure test. - test_cuda_graph_doubles_peak_multiplier: locks in the 7 -> 14 multiplier relationship in compute_cuda_mem_limit. - test_arcface_is_fixed / test_facenet_is_fixed / test_yolov9_license_plate_is_fixed: explicit fixed-size guards for the remaining enrichment models the fix targets. 28 tests pass (was 21). Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-05-07 05:55:27 +03:00 · 2026-05-03 21:46:58 -07:00 · 2026-05-03 21:46:58 -07:00 · 3c6937c71f
commit 3c6937c71f
parent 13abc4c9eb
1 changed files with 152 additions and 0 deletions
--- a/frigate/test/test_detection_runners.py
+++ b/frigate/test/test_detection_runners.py
@ -40,6 +40,19 @@ class TestGetOrtSessionOptions(unittest.TestCase):
            ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
        )

+    def test_default_sets_enable_all_optimization(self):
+        # Guards the explicit `else` branch added so the optimization level is
+        # never implicit — protects against ORT default changes.
+        from frigate.detectors.detection_runners import get_ort_session_options
+
+        import onnxruntime as ort
+
+        opts = get_ort_session_options()
+        self.assertEqual(
+            opts.graph_optimization_level,
+            ort.GraphOptimizationLevel.ORT_ENABLE_ALL,
+        )
+
    def test_always_returns_session_options(self):
        from frigate.detectors.detection_runners import get_ort_session_options

@ -98,6 +111,36 @@ class TestHasVariableLengthInputs(unittest.TestCase):

        self.assertFalse(ONNXModelRunner.has_variable_length_inputs(None))

+    def test_arcface_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.arcface.value
+            )
+        )
+
+    def test_facenet_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.facenet.value
+            )
+        )
+
+    def test_yolov9_license_plate_is_fixed(self):
+        from frigate.detectors.detection_runners import ONNXModelRunner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        self.assertFalse(
+            ONNXModelRunner.has_variable_length_inputs(
+                EnrichmentModelTypeEnum.yolov9_license_plate.value
+            )
+        )
+

 class TestComputeCudaMemLimit(unittest.TestCase):
    @staticmethod
@ -161,6 +204,27 @@ class TestComputeCudaMemLimit(unittest.TestCase):

        self.assertIsNone(compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False))

+    @patch("frigate.util.model.ctypes.CDLL")
+    @patch("os.path.getsize", return_value=500 * 1024 * 1024)
+    def test_cuda_graph_doubles_peak_multiplier(self, _mock_getsize, mock_cdll):
+        # cuda_graph=True must use peak_multiplier=14 (vs 7 for cuda_graph=False)
+        # because graph capture pins all intermediate tensors live simultaneously.
+        from frigate.util.model import compute_cuda_mem_limit
+
+        total_vram = 24 * 1024**3
+        mock_lib = MagicMock()
+        mock_cdll.return_value = mock_lib
+        mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
+            total_vram, total_vram
+        )
+
+        model_size = 500 * 1024 * 1024
+        with_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=True)
+        without_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False)
+        self.assertGreaterEqual(with_graph, model_size * 14)
+        self.assertGreaterEqual(without_graph, model_size * 7)
+        self.assertGreater(with_graph, without_graph)
+
    @patch("frigate.util.model.ctypes.CDLL")
    @patch("os.path.getsize", return_value=200 * 1024 * 1024)
    def test_capped_by_free_vram_when_constrained(self, _mock_getsize, mock_cdll):
@ -380,6 +444,57 @@ class TestRunnerOmitsGpuMemLimitOnCudaQueryFailure(unittest.TestCase):
        )


+class TestRunnerInjectsGpuMemLimitOnCudaQuerySuccess(unittest.TestCase):
+    """Positive counterpart to TestRunnerOmitsGpuMemLimitOnCudaQueryFailure:
+    when cudaMemGetInfo succeeds, gpu_mem_limit must be injected into
+    provider_options so ORT's BFC arena is bounded."""
+
+    @staticmethod
+    def _fake_mem_get_info(free_value: int, total_value: int):
+        def _impl(free_ptr, total_ptr):
+            free_ptr._obj.value = free_value
+            total_ptr._obj.value = total_value
+            return 0  # cudaSuccess
+
+        return _impl
+
+    @patch("frigate.detectors.detection_runners.ort.InferenceSession")
+    @patch(
+        "frigate.detectors.detection_runners.get_ort_providers",
+        return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
+    )
+    @patch(
+        "frigate.detectors.detection_runners.is_rknn_compatible",
+        return_value=False,
+    )
+    @patch("frigate.util.model.ctypes.CDLL")
+    @patch("os.path.getsize", return_value=200 * 1024 * 1024)
+    def test_gpu_mem_limit_key_present_when_cuda_query_succeeds(
+        self, _gs, mock_cdll, _rknn, _gp, mock_session
+    ):
+        from frigate.detectors.detection_runners import get_optimized_runner
+        from frigate.embeddings.types import EnrichmentModelTypeEnum
+
+        total_vram = 24 * 1024**3
+        mock_lib = MagicMock()
+        mock_cdll.return_value = mock_lib
+        mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
+            total_vram, total_vram
+        )
+        mock_session.return_value.get_inputs.return_value = []
+        mock_session.return_value.get_outputs.return_value = []
+
+        get_optimized_runner(
+            "/fake/jina.onnx",
+            device="GPU",
+            model_type=EnrichmentModelTypeEnum.jina_v2.value,
+        )
+
+        provider_opts = mock_session.call_args.kwargs["provider_options"]
+        self.assertIn("gpu_mem_limit", provider_opts[0])
+        self.assertGreater(provider_opts[0]["gpu_mem_limit"], 0)
+
+
 class TestCudaGraphFallbackLogsException(unittest.TestCase):
    @patch("frigate.detectors.detection_runners.ort.InferenceSession")
    @patch(
@ -417,6 +532,43 @@ class TestCudaGraphFallbackLogsException(unittest.TestCase):
        self.assertIn("CUDA graph capture failed", joined)
        self.assertIn("cudaErrorStreamCaptureUnsupported", joined)

+    @patch("frigate.detectors.detection_runners.ort.InferenceSession")
+    @patch(
+        "frigate.detectors.detection_runners.get_ort_providers",
+        return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
+    )
+    @patch(
+        "frigate.detectors.detection_runners.is_rknn_compatible",
+        return_value=False,
+    )
+    @patch("frigate.util.model.ctypes.CDLL", side_effect=OSError("no cuda"))
+    @patch("os.path.getsize", return_value=200 * 1024 * 1024)
+    def test_fallback_warning_includes_developer_context(
+        self, _gs, _cdll, _rknn, _gp, mock_session
+    ):
+        # Guards the enriched warning fields (model_type, device_id, providers)
+        # so a future revert to the bare "model_path + e" form is caught.
+        from frigate.detectors.detection_runners import get_optimized_runner
+        from frigate.detectors.detector_config import ModelTypeEnum
+
+        mock_session.side_effect = [
+            RuntimeError("boom"),
+            MagicMock(get_inputs=lambda: [], get_outputs=lambda: []),
+        ]
+
+        with self.assertLogs(
+            "frigate.detectors.detection_runners", level="WARNING"
+        ) as captured:
+            get_optimized_runner(
+                "/m/yolo.onnx", "GPU", ModelTypeEnum.yologeneric.value
+            )
+
+        joined = "\n".join(captured.output)
+        self.assertIn(f"model_type={ModelTypeEnum.yologeneric.value}", joined)
+        self.assertIn("path=/m/yolo.onnx", joined)
+        self.assertIn("device_id=0", joined)
+        self.assertIn("CUDAExecutionProvider", joined)
+

 if __name__ == "__main__":
    unittest.main()