mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-07 05:55:27 +03:00
test: close coverage gaps in ORT leak fix regression suite
Audit of test_detection_runners.py against the actual fix changes surfaced five cases where the test suite would silently pass after a regression. Adds: - test_fallback_warning_includes_developer_context: guards the enriched CUDA-graph fallback warning fields (model_type, path, device_id, providers) against revert to the bare form. - test_default_sets_enable_all_optimization: guards the explicit else branch that pins graph_optimization_level=ORT_ENABLE_ALL, added to be robust against ORT default changes. - test_gpu_mem_limit_key_present_when_cuda_query_succeeds: positive counterpart to the existing omit-on-failure test. - test_cuda_graph_doubles_peak_multiplier: locks in the 7 -> 14 multiplier relationship in compute_cuda_mem_limit. - test_arcface_is_fixed / test_facenet_is_fixed / test_yolov9_license_plate_is_fixed: explicit fixed-size guards for the remaining enrichment models the fix targets. 28 tests pass (was 21). Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
13abc4c9eb
commit
3c6937c71f
@ -40,6 +40,19 @@ class TestGetOrtSessionOptions(unittest.TestCase):
|
||||
ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
|
||||
)
|
||||
|
||||
def test_default_sets_enable_all_optimization(self):
|
||||
# Guards the explicit `else` branch added so the optimization level is
|
||||
# never implicit — protects against ORT default changes.
|
||||
from frigate.detectors.detection_runners import get_ort_session_options
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
opts = get_ort_session_options()
|
||||
self.assertEqual(
|
||||
opts.graph_optimization_level,
|
||||
ort.GraphOptimizationLevel.ORT_ENABLE_ALL,
|
||||
)
|
||||
|
||||
def test_always_returns_session_options(self):
|
||||
from frigate.detectors.detection_runners import get_ort_session_options
|
||||
|
||||
@ -98,6 +111,36 @@ class TestHasVariableLengthInputs(unittest.TestCase):
|
||||
|
||||
self.assertFalse(ONNXModelRunner.has_variable_length_inputs(None))
|
||||
|
||||
def test_arcface_is_fixed(self):
|
||||
from frigate.detectors.detection_runners import ONNXModelRunner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
self.assertFalse(
|
||||
ONNXModelRunner.has_variable_length_inputs(
|
||||
EnrichmentModelTypeEnum.arcface.value
|
||||
)
|
||||
)
|
||||
|
||||
def test_facenet_is_fixed(self):
|
||||
from frigate.detectors.detection_runners import ONNXModelRunner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
self.assertFalse(
|
||||
ONNXModelRunner.has_variable_length_inputs(
|
||||
EnrichmentModelTypeEnum.facenet.value
|
||||
)
|
||||
)
|
||||
|
||||
def test_yolov9_license_plate_is_fixed(self):
|
||||
from frigate.detectors.detection_runners import ONNXModelRunner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
self.assertFalse(
|
||||
ONNXModelRunner.has_variable_length_inputs(
|
||||
EnrichmentModelTypeEnum.yolov9_license_plate.value
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class TestComputeCudaMemLimit(unittest.TestCase):
|
||||
@staticmethod
|
||||
@ -161,6 +204,27 @@ class TestComputeCudaMemLimit(unittest.TestCase):
|
||||
|
||||
self.assertIsNone(compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False))
|
||||
|
||||
@patch("frigate.util.model.ctypes.CDLL")
|
||||
@patch("os.path.getsize", return_value=500 * 1024 * 1024)
|
||||
def test_cuda_graph_doubles_peak_multiplier(self, _mock_getsize, mock_cdll):
|
||||
# cuda_graph=True must use peak_multiplier=14 (vs 7 for cuda_graph=False)
|
||||
# because graph capture pins all intermediate tensors live simultaneously.
|
||||
from frigate.util.model import compute_cuda_mem_limit
|
||||
|
||||
total_vram = 24 * 1024**3
|
||||
mock_lib = MagicMock()
|
||||
mock_cdll.return_value = mock_lib
|
||||
mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
|
||||
total_vram, total_vram
|
||||
)
|
||||
|
||||
model_size = 500 * 1024 * 1024
|
||||
with_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=True)
|
||||
without_graph = compute_cuda_mem_limit("/fake/model.onnx", cuda_graph=False)
|
||||
self.assertGreaterEqual(with_graph, model_size * 14)
|
||||
self.assertGreaterEqual(without_graph, model_size * 7)
|
||||
self.assertGreater(with_graph, without_graph)
|
||||
|
||||
@patch("frigate.util.model.ctypes.CDLL")
|
||||
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
|
||||
def test_capped_by_free_vram_when_constrained(self, _mock_getsize, mock_cdll):
|
||||
@ -380,6 +444,57 @@ class TestRunnerOmitsGpuMemLimitOnCudaQueryFailure(unittest.TestCase):
|
||||
)
|
||||
|
||||
|
||||
class TestRunnerInjectsGpuMemLimitOnCudaQuerySuccess(unittest.TestCase):
|
||||
"""Positive counterpart to TestRunnerOmitsGpuMemLimitOnCudaQueryFailure:
|
||||
when cudaMemGetInfo succeeds, gpu_mem_limit must be injected into
|
||||
provider_options so ORT's BFC arena is bounded."""
|
||||
|
||||
@staticmethod
|
||||
def _fake_mem_get_info(free_value: int, total_value: int):
|
||||
def _impl(free_ptr, total_ptr):
|
||||
free_ptr._obj.value = free_value
|
||||
total_ptr._obj.value = total_value
|
||||
return 0 # cudaSuccess
|
||||
|
||||
return _impl
|
||||
|
||||
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
|
||||
@patch(
|
||||
"frigate.detectors.detection_runners.get_ort_providers",
|
||||
return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
|
||||
)
|
||||
@patch(
|
||||
"frigate.detectors.detection_runners.is_rknn_compatible",
|
||||
return_value=False,
|
||||
)
|
||||
@patch("frigate.util.model.ctypes.CDLL")
|
||||
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
|
||||
def test_gpu_mem_limit_key_present_when_cuda_query_succeeds(
|
||||
self, _gs, mock_cdll, _rknn, _gp, mock_session
|
||||
):
|
||||
from frigate.detectors.detection_runners import get_optimized_runner
|
||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||
|
||||
total_vram = 24 * 1024**3
|
||||
mock_lib = MagicMock()
|
||||
mock_cdll.return_value = mock_lib
|
||||
mock_lib.cudaMemGetInfo.side_effect = self._fake_mem_get_info(
|
||||
total_vram, total_vram
|
||||
)
|
||||
mock_session.return_value.get_inputs.return_value = []
|
||||
mock_session.return_value.get_outputs.return_value = []
|
||||
|
||||
get_optimized_runner(
|
||||
"/fake/jina.onnx",
|
||||
device="GPU",
|
||||
model_type=EnrichmentModelTypeEnum.jina_v2.value,
|
||||
)
|
||||
|
||||
provider_opts = mock_session.call_args.kwargs["provider_options"]
|
||||
self.assertIn("gpu_mem_limit", provider_opts[0])
|
||||
self.assertGreater(provider_opts[0]["gpu_mem_limit"], 0)
|
||||
|
||||
|
||||
class TestCudaGraphFallbackLogsException(unittest.TestCase):
|
||||
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
|
||||
@patch(
|
||||
@ -417,6 +532,43 @@ class TestCudaGraphFallbackLogsException(unittest.TestCase):
|
||||
self.assertIn("CUDA graph capture failed", joined)
|
||||
self.assertIn("cudaErrorStreamCaptureUnsupported", joined)
|
||||
|
||||
@patch("frigate.detectors.detection_runners.ort.InferenceSession")
|
||||
@patch(
|
||||
"frigate.detectors.detection_runners.get_ort_providers",
|
||||
return_value=(["CUDAExecutionProvider"], [{"device_id": 0}]),
|
||||
)
|
||||
@patch(
|
||||
"frigate.detectors.detection_runners.is_rknn_compatible",
|
||||
return_value=False,
|
||||
)
|
||||
@patch("frigate.util.model.ctypes.CDLL", side_effect=OSError("no cuda"))
|
||||
@patch("os.path.getsize", return_value=200 * 1024 * 1024)
|
||||
def test_fallback_warning_includes_developer_context(
|
||||
self, _gs, _cdll, _rknn, _gp, mock_session
|
||||
):
|
||||
# Guards the enriched warning fields (model_type, device_id, providers)
|
||||
# so a future revert to the bare "model_path + e" form is caught.
|
||||
from frigate.detectors.detection_runners import get_optimized_runner
|
||||
from frigate.detectors.detector_config import ModelTypeEnum
|
||||
|
||||
mock_session.side_effect = [
|
||||
RuntimeError("boom"),
|
||||
MagicMock(get_inputs=lambda: [], get_outputs=lambda: []),
|
||||
]
|
||||
|
||||
with self.assertLogs(
|
||||
"frigate.detectors.detection_runners", level="WARNING"
|
||||
) as captured:
|
||||
get_optimized_runner(
|
||||
"/m/yolo.onnx", "GPU", ModelTypeEnum.yologeneric.value
|
||||
)
|
||||
|
||||
joined = "\n".join(captured.output)
|
||||
self.assertIn(f"model_type={ModelTypeEnum.yologeneric.value}", joined)
|
||||
self.assertIn("path=/m/yolo.onnx", joined)
|
||||
self.assertIn("device_id=0", joined)
|
||||
self.assertIn("CUDAExecutionProvider", joined)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user