From 3c6937c71fce7bcb4dd8ff35ef4258ce727cb885 Mon Sep 17 00:00:00 2001 From: felalex Date: Sun, 3 May 2026 21:46:58 -0700 Subject: [PATCH] test: close coverage gaps in ORT leak fix regression suite Audit of test_detection_runners.py against the actual fix changes surfaced five cases where the test suite would silently pass after a regression. Adds: - test_fallback_warning_includes_developer_context: guards the enriched CUDA-graph fallback warning fields (model_type, path, device_id, providers) against revert to the bare form. - test_default_sets_enable_all_optimization: guards the explicit else branch that pins graph_optimization_level=ORT_ENABLE_ALL, added to be robust against ORT default changes. - test_gpu_mem_limit_key_present_when_cuda_query_succeeds: positive counterpart to the existing omit-on-failure test. - test_cuda_graph_doubles_peak_multiplier: locks in the 7 -> 14 multiplier relationship in compute_cuda_mem_limit. - test_arcface_is_fixed / test_facenet_is_fixed / test_yolov9_license_plate_is_fixed: explicit fixed-size guards for the remaining enrichment models the fix targets. 28 tests pass (was 21). Co-Authored-By: Claude Haiku 4.5 --- frigate/test/test_detection_runners.py | 152 +++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/frigate/test/test_detection_runners.py b/frigate/test/test_detection_runners.py index 4f11d7afb..2c593f7a0 100644 --- a/frigate/test/test_detection_runners.py +++ b/frigate/test/test_detection_runners.py @@ -40,6 +40,19 @@ class TestGetOrtSessionOptions(unittest.TestCase): ort.GraphOptimizationLevel.ORT_ENABLE_BASIC, ) + def test_default_sets_enable_all_optimization(self): + # Guards the explicit `else` branch added so the optimization level is + # never implicit — protects against ORT default changes. + from frigate.detectors.detection_runners import get_ort_session_options + + import onnxruntime as ort + + opts = get_ort_session_options() + self.assertEqual( + opts.graph_optimization_level, + ort.GraphOptimizationLevel.ORT_ENABLE_ALL, + ) + def test_always_returns_session_options(self): from frigate.detectors.detection_runners import get_ort_session_options @@ -98,6 +111,36 @@ class TestHasVariableLengthInputs(unittest.TestCase): self.assertFalse(ONNXModelRunner.has_variable_length_inputs(None)) + def test_arcface_is_fixed(self): + from frigate.detectors.detection_runners import ONNXModelRunner + from frigate.embeddings.types import EnrichmentModelTypeEnum + + self.assertFalse( + ONNXModelRunner.has_variable_length_inputs( + EnrichmentModelTypeEnum.arcface.value + ) + ) + + def test_facenet_is_fixed(self): + from frigate.detectors.detection_runners import ONNXModelRunner + from frigate.embeddings.types import EnrichmentModelTypeEnum + + self.assertFalse( + ONNXModelRunner.has_variable_length_inputs( + EnrichmentModelTypeEnum.facenet.value + ) + ) + + def test_yolov9_license_plate_is_fixed(self): + from frigate.detectors.detection_runners import ONNXModelRunner + from frigate.embeddings.types import EnrichmentModelTypeEnum + + self.assertFalse( + ONNXModelRunner.has_variable_length_inputs( + EnrichmentModelTypeEnum.yolov9_license_plate.value + ) + ) + class TestComputeCudaMemLimit(unittest.TestCase): @staticmethod @@ -161,6 +204,27 @@ class TestComputeCudaMemLimit(unittest.TestCase): self.assertIsNone(compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False)) + @patch("frigate.util.model.ctypes.CDLL") + @patch("os.path.getsize", return_value=500 * 1024 * 1024) + def test_cuda_graph_doubles_peak_multiplier(self, _mock_getsize, mock_cdll): + # cuda_graph=True must use peak_multiplier=14 (vs 7 for cuda_graph=False) + # because graph capture pins all intermediate tensors live simultaneously. + from frigate.util.model import compute_cuda_mem_limit + + total_vram = 24 * 1024**3 + mock_lib = MagicMock() + mock_cdll.return_value = mock_lib + mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info( + total_vram, total_vram + ) + + model_size = 500 * 1024 * 1024 + with_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=True) + without_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False) + self.assertGreaterEqual(with_graph, model_size * 14) + self.assertGreaterEqual(without_graph, model_size * 7) + self.assertGreater(with_graph, without_graph) + @patch("frigate.util.model.ctypes.CDLL") @patch("os.path.getsize", return_value=200 * 1024 * 1024) def test_capped_by_free_vram_when_constrained(self, _mock_getsize, mock_cdll): @@ -380,6 +444,57 @@ class TestRunnerOmitsGpuMemLimitOnCudaQueryFailure(unittest.TestCase): ) +class TestRunnerInjectsGpuMemLimitOnCudaQuerySuccess(unittest.TestCase): + """Positive counterpart to TestRunnerOmitsGpuMemLimitOnCudaQueryFailure: + when cudaMemGetInfo succeeds, gpu_mem_limit must be injected into + provider_options so ORT's BFC arena is bounded.""" + + @staticmethod + def _fake_mem_get_info(free_value: int, total_value: int): + def _impl(free_ptr, total_ptr): + free_ptr._obj.value = free_value + total_ptr._obj.value = total_value + return 0 # cudaSuccess + + return _impl + + @patch("frigate.detectors.detection_runners.ort.InferenceSession") + @patch( + "frigate.detectors.detection_runners.get_ort_providers", + return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]), + ) + @patch( + "frigate.detectors.detection_runners.is_rknn_compatible", + return_value=False, + ) + @patch("frigate.util.model.ctypes.CDLL") + @patch("os.path.getsize", return_value=200 * 1024 * 1024) + def test_gpu_mem_limit_key_present_when_cuda_query_succeeds( + self, _gs, mock_cdll, _rknn, _gp, mock_session + ): + from frigate.detectors.detection_runners import get_optimized_runner + from frigate.embeddings.types import EnrichmentModelTypeEnum + + total_vram = 24 * 1024**3 + mock_lib = MagicMock() + mock_cdll.return_value = mock_lib + mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info( + total_vram, total_vram + ) + mock_session.return_value.get_inputs.return_value = [] + mock_session.return_value.get_outputs.return_value = [] + + get_optimized_runner( + "/fake/jina.onnx", + device="GPU", + model_type=EnrichmentModelTypeEnum.jina_v2.value, + ) + + provider_opts = mock_session.call_args.kwargs["provider_options"] + self.assertIn("gpu_mem_limit", provider_opts[0]) + self.assertGreater(provider_opts[0]["gpu_mem_limit"], 0) + + class TestCudaGraphFallbackLogsException(unittest.TestCase): @patch("frigate.detectors.detection_runners.ort.InferenceSession") @patch( @@ -417,6 +532,43 @@ class TestCudaGraphFallbackLogsException(unittest.TestCase): self.assertIn("CUDA graph capture failed", joined) self.assertIn("cudaErrorStreamCaptureUnsupported", joined) + @patch("frigate.detectors.detection_runners.ort.InferenceSession") + @patch( + "frigate.detectors.detection_runners.get_ort_providers", + return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]), + ) + @patch( + "frigate.detectors.detection_runners.is_rknn_compatible", + return_value=False, + ) + @patch("frigate.util.model.ctypes.CDLL", side_effect=OSError("no cuda")) + @patch("os.path.getsize", return_value=200 * 1024 * 1024) + def test_fallback_warning_includes_developer_context( + self, _gs, _cdll, _rknn, _gp, mock_session + ): + # Guards the enriched warning fields (model_type, device_id, providers) + # so a future revert to the bare "model_path + e" form is caught. + from frigate.detectors.detection_runners import get_optimized_runner + from frigate.detectors.detector_config import ModelTypeEnum + + mock_session.side_effect = [ + RuntimeError("boom"), + MagicMock(get_inputs=lambda: [], get_outputs=lambda: []), + ] + + with self.assertLogs( + "frigate.detectors.detection_runners", level="WARNING" + ) as captured: + get_optimized_runner( + "/m/yolo.onnx", "GPU", ModelTypeEnum.yologeneric.value + ) + + joined = "\n".join(captured.output) + self.assertIn(f"model_type={ModelTypeEnum.yologeneric.value}", joined) + self.assertIn("path=/m/yolo.onnx", joined) + self.assertIn("device_id=0", joined) + self.assertIn("CUDAExecutionProvider", joined) + if __name__ == "__main__": unittest.main()