Add warm-up to onnx as some GPUs require kernel compilation before accepting inferences (#22685)

2026-05-15 09:50:51 +03:00 · 2026-03-29 10:19:46 -06:00 · 2026-03-29 10:19:46 -06:00 · 29ca18c24c
commit 29ca18c24c
parent 148e11afc5
1 changed files with 28 additions and 0 deletions
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@ -8,6 +8,8 @@ from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detection_runners import get_optimized_runner
 from frigate.detectors.detector_config import (
    BaseDetectorConfig,
+    InputDTypeEnum,
+    InputTensorEnum,
    ModelTypeEnum,
 )
 from frigate.util.model import (
@ -59,8 +61,34 @@ class ONNXDetector(DetectionApi):
        if self.onnx_model_type == ModelTypeEnum.yolox:
            self.calculate_grids_strides()

+        self._warmup(detector_config)
        logger.info(f"ONNX: {path} loaded")

+    def _warmup(self, detector_config: ONNXDetectorConfig) -> None:
+        """Run a warmup inference to front-load one-time compilation costs.
+
+        Some GPU backends have a slow first inference: CUDA may need PTX JIT
+        compilation on newer architectures (e.g. NVIDIA 50-series / Blackwell),
+        and MIGraphX compiles the model graph on first run. Running it here
+        (during detector creation) keeps the watchdog start_time at 0.0 so the
+        process won't be killed.
+        """
+        if detector_config.model.input_tensor == InputTensorEnum.nchw:
+            shape = (1, 3, detector_config.model.height, detector_config.model.width)
+        else:
+            shape = (1, detector_config.model.height, detector_config.model.width, 3)
+
+        if detector_config.model.input_dtype in (
+            InputDTypeEnum.float,
+            InputDTypeEnum.float_denorm,
+        ):
+            dtype = np.float32
+        else:
+            dtype = np.uint8
+
+        logger.info("ONNX: warming up detector (may take a while on first run)...")
+        self.detect_raw(np.zeros(shape, dtype=dtype))
+
    def detect_raw(self, tensor_input: np.ndarray):
        if self.onnx_model_type == ModelTypeEnum.dfine:
            tensor_output = self.runner.run(