test: close coverage gaps in ORT leak fix regression suite

Audit of test_detection_runners.py against the actual fix changes
surfaced five cases where the test suite would silently pass after
a regression. Adds:

- test_fallback_warning_includes_developer_context: guards the
  enriched CUDA-graph fallback warning fields (model_type, path,
  device_id, providers) against revert to the bare form.
- test_default_sets_enable_all_optimization: guards the explicit
  else branch that pins graph_optimization_level=ORT_ENABLE_ALL,
  added to be robust against ORT default changes.
- test_gpu_mem_limit_key_present_when_cuda_query_succeeds: positive
  counterpart to the existing omit-on-failure test.
- test_cuda_graph_doubles_peak_multiplier: locks in the 7 -> 14
  multiplier relationship in compute_cuda_mem_limit.
- test_arcface_is_fixed / test_facenet_is_fixed /
  test_yolov9_license_plate_is_fixed: explicit fixed-size guards
  for the remaining enrichment models the fix targets.

28 tests pass (was 21).

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
felalex 2026-05-03 21:46:58 -07:00
parent 13abc4c9eb
commit 3c6937c71f

View File

@ -40,6 +40,19 @@ class TestGetOrtSessionOptions(unittest.TestCase):
ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
)
def test_default_sets_enable_all_optimization(self):
# Guards the explicit `else` branch added so the optimization level is
# never implicit — protects against ORT default changes.
from frigate.detectors.detection_runners import get_ort_session_options
import onnxruntime as ort
opts = get_ort_session_options()
self.assertEqual(
opts.graph_optimization_level,
ort.GraphOptimizationLevel.ORT_ENABLE_ALL,
)
def test_always_returns_session_options(self):
from frigate.detectors.detection_runners import get_ort_session_options
@ -98,6 +111,36 @@ class TestHasVariableLengthInputs(unittest.TestCase):
self.assertFalse(ONNXModelRunner.has_variable_length_inputs(None))
def test_arcface_is_fixed(self):
from frigate.detectors.detection_runners import ONNXModelRunner
from frigate.embeddings.types import EnrichmentModelTypeEnum
self.assertFalse(
ONNXModelRunner.has_variable_length_inputs(
EnrichmentModelTypeEnum.arcface.value
)
)
def test_facenet_is_fixed(self):
from frigate.detectors.detection_runners import ONNXModelRunner
from frigate.embeddings.types import EnrichmentModelTypeEnum
self.assertFalse(
ONNXModelRunner.has_variable_length_inputs(
EnrichmentModelTypeEnum.facenet.value
)
)
def test_yolov9_license_plate_is_fixed(self):
from frigate.detectors.detection_runners import ONNXModelRunner
from frigate.embeddings.types import EnrichmentModelTypeEnum
self.assertFalse(
ONNXModelRunner.has_variable_length_inputs(
EnrichmentModelTypeEnum.yolov9_license_plate.value
)
)
class TestComputeCudaMemLimit(unittest.TestCase):
@staticmethod
@ -161,6 +204,27 @@ class TestComputeCudaMemLimit(unittest.TestCase):
self.assertIsNone(compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False))
@patch("frigate.util.model.ctypes.CDLL")
@patch("os.path.getsize", return_value=500 * 1024 * 1024)
def test_cuda_graph_doubles_peak_multiplier(self, _mock_getsize, mock_cdll):
# cuda_graph=True must use peak_multiplier=14 (vs 7 for cuda_graph=False)
# because graph capture pins all intermediate tensors live simultaneously.
from frigate.util.model import compute_cuda_mem_limit
total_vram = 24 * 1024**3
mock_lib = MagicMock()
mock_cdll.return_value = mock_lib
mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
total_vram, total_vram
)
model_size = 500 * 1024 * 1024
with_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=True)
without_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False)
self.assertGreaterEqual(with_graph, model_size * 14)
self.assertGreaterEqual(without_graph, model_size * 7)
self.assertGreater(with_graph, without_graph)
@patch("frigate.util.model.ctypes.CDLL")
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
def test_capped_by_free_vram_when_constrained(self, _mock_getsize, mock_cdll):
@ -380,6 +444,57 @@ class TestRunnerOmitsGpuMemLimitOnCudaQueryFailure(unittest.TestCase):
)
class TestRunnerInjectsGpuMemLimitOnCudaQuerySuccess(unittest.TestCase):
"""Positive counterpart to TestRunnerOmitsGpuMemLimitOnCudaQueryFailure:
when cudaMemGetInfo succeeds, gpu_mem_limit must be injected into
provider_options so ORT's BFC arena is bounded."""
@staticmethod
def _fake_mem_get_info(free_value: int, total_value: int):
def _impl(free_ptr, total_ptr):
free_ptr._obj.value = free_value
total_ptr._obj.value = total_value
return 0 # cudaSuccess
return _impl
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
@patch(
"frigate.detectors.detection_runners.get_ort_providers",
return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
)
@patch(
"frigate.detectors.detection_runners.is_rknn_compatible",
return_value=False,
)
@patch("frigate.util.model.ctypes.CDLL")
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
def test_gpu_mem_limit_key_present_when_cuda_query_succeeds(
self, _gs, mock_cdll, _rknn, _gp, mock_session
):
from frigate.detectors.detection_runners import get_optimized_runner
from frigate.embeddings.types import EnrichmentModelTypeEnum
total_vram = 24 * 1024**3
mock_lib = MagicMock()
mock_cdll.return_value = mock_lib
mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
total_vram, total_vram
)
mock_session.return_value.get_inputs.return_value = []
mock_session.return_value.get_outputs.return_value = []
get_optimized_runner(
"/fake/jina.onnx",
device="GPU",
model_type=EnrichmentModelTypeEnum.jina_v2.value,
)
provider_opts = mock_session.call_args.kwargs["provider_options"]
self.assertIn("gpu_mem_limit", provider_opts[0])
self.assertGreater(provider_opts[0]["gpu_mem_limit"], 0)
class TestCudaGraphFallbackLogsException(unittest.TestCase):
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
@patch(
@ -417,6 +532,43 @@ class TestCudaGraphFallbackLogsException(unittest.TestCase):
self.assertIn("CUDA graph capture failed", joined)
self.assertIn("cudaErrorStreamCaptureUnsupported", joined)
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
@patch(
"frigate.detectors.detection_runners.get_ort_providers",
return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
)
@patch(
"frigate.detectors.detection_runners.is_rknn_compatible",
return_value=False,
)
@patch("frigate.util.model.ctypes.CDLL", side_effect=OSError("no cuda"))
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
def test_fallback_warning_includes_developer_context(
self, _gs, _cdll, _rknn, _gp, mock_session
):
# Guards the enriched warning fields (model_type, device_id, providers)
# so a future revert to the bare "model_path + e" form is caught.
from frigate.detectors.detection_runners import get_optimized_runner
from frigate.detectors.detector_config import ModelTypeEnum
mock_session.side_effect = [
RuntimeError("boom"),
MagicMock(get_inputs=lambda: [], get_outputs=lambda: []),
]
with self.assertLogs(
"frigate.detectors.detection_runners", level="WARNING"
) as captured:
get_optimized_runner(
"/m/yolo.onnx", "GPU", ModelTypeEnum.yologeneric.value
)
joined = "\n".join(captured.output)
self.assertIn(f"model_type={ModelTypeEnum.yologeneric.value}", joined)
self.assertIn("path=/m/yolo.onnx", joined)
self.assertIn("device_id=0", joined)
self.assertIn("CUDAExecutionProvider", joined)
if __name__ == "__main__":
unittest.main()