serialize OpenVINO inference per process to prevent concurrent-inference segfault

2026-06-21 03:41:55 +03:00 · 2026-06-03 14:38:38 -05:00 · 2026-06-03 14:38:38 -05:00 · f9027e0e2f
commit f9027e0e2f
parent b751025339
1 changed files with 21 additions and 11 deletions
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@ -15,6 +15,9 @@ from frigate.util.rknn_converter import auto_convert_model, is_rknn_compatible

 logger = logging.getLogger(__name__)

+# Process-wide lock serializing all OpenVINO compile/inference calls
+_OPENVINO_LOCK = threading.Lock()
+

 def is_arm64_platform() -> bool:
    """Check if we're running on an ARM platform."""
@ -326,18 +329,23 @@ class OpenVINOModelRunner(BaseModelRunner):
            except Exception as e:
                logger.debug(f"NPU_TURBO not supported by driver: {e}")

-        # Compile model
-        self.compiled_model = self.ov_core.compile_model(
-            model=model_path, device_name=device
-        )
+        # Compile model under the shared lock
+        with _OPENVINO_LOCK:
+            self.compiled_model = self.ov_core.compile_model(
+                model=model_path, device_name=device
+            )
+
+            # Create reusable inference request
+            self.infer_request = self.compiled_model.create_infer_request()

-        # Create reusable inference request
-        self.infer_request = self.compiled_model.create_infer_request()
        self.input_tensor: ov.Tensor | None = None

-        # Thread lock to prevent concurrent inference (needed for JinaV2 which shares
-        # one runner between text and vision embeddings called from different threads)
-        self._inference_lock = threading.Lock()
+        # Shared, process-wide lock serializing inference across all OpenVINO
+        # runners in this process. Needed both for the JinaV2 case (one runner
+        # shared between text and vision threads) and to prevent two *different*
+        # runners (e.g. an ArcFace face-model build thread and the LPR detector)
+        # from inferring concurrently and corrupting shared OpenVINO state.
+        self._inference_lock = _OPENVINO_LOCK

        if not self.complex_model:
            try:
@ -382,8 +390,10 @@ class OpenVINOModelRunner(BaseModelRunner):
        Returns:
            List of output tensors
        """
-        # Lock prevents concurrent access to infer_request
-        # Needed for JinaV2: genai thread (text) + embeddings thread (vision)
+        # Shared lock serializes inference across every OpenVINO runner in this
+        # process — both the shared-runner JinaV2 case (genai text thread +
+        # embeddings vision thread) and distinct runners running on separate
+        # threads (e.g. the ArcFace face-model build vs the LPR detector).
        with self._inference_lock:
            from frigate.embeddings.types import EnrichmentModelTypeEnum