Use thread lock for openvino to avoid concurrent requests with JinaV2

2026-07-06 03:51:14 +03:00 · 2025-11-07 08:46:43 -07:00 · 2025-11-07 08:46:43 -07:00 · ab3ded38e6
commit ab3ded38e6
parent 2376bcaf97
1 changed files with 60 additions and 52 deletions
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@ -3,6 +3,7 @@
 import logging
 import os
 import platform
 import threading
 from abc import ABC, abstractmethod
 from typing import Any
@ -290,6 +291,10 @@ class OpenVINOModelRunner(BaseModelRunner):
        self.infer_request = self.compiled_model.create_infer_request()
        self.input_tensor: ov.Tensor | None = None
        # Thread lock to prevent concurrent inference (needed for JinaV2 which shares
        # one runner between text and vision embeddings called from different threads)
        self._inference_lock = threading.Lock()
        if not self.complex_model:
            try:
                input_shape = self.compiled_model.inputs[0].get_shape()
@ -333,6 +338,9 @@ class OpenVINOModelRunner(BaseModelRunner):
        Returns:
            List of output tensors
        """
        # Lock prevents concurrent access to infer_request
        # Needed for JinaV2: genai thread (text) + embeddings thread (vision)
        with self._inference_lock:
            # Handle single input case for backward compatibility
            if (
                len(inputs) == 1