mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-06-28 15:21:54 +03:00
serialize OpenVINO inference per process to prevent concurrent-inference segfault
This commit is contained in:
parent
a08e2d7529
commit
2e04f2f152
@ -15,6 +15,9 @@ from frigate.util.rknn_converter import auto_convert_model, is_rknn_compatible
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Process-wide lock serializing all OpenVINO compile/inference calls
|
||||||
|
_OPENVINO_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def is_arm64_platform() -> bool:
|
def is_arm64_platform() -> bool:
|
||||||
"""Check if we're running on an ARM platform."""
|
"""Check if we're running on an ARM platform."""
|
||||||
@ -326,18 +329,23 @@ class OpenVINOModelRunner(BaseModelRunner):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"NPU_TURBO not supported by driver: {e}")
|
logger.debug(f"NPU_TURBO not supported by driver: {e}")
|
||||||
|
|
||||||
# Compile model
|
# Compile model under the shared lock
|
||||||
self.compiled_model = self.ov_core.compile_model(
|
with _OPENVINO_LOCK:
|
||||||
model=model_path, device_name=device
|
self.compiled_model = self.ov_core.compile_model(
|
||||||
)
|
model=model_path, device_name=device
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create reusable inference request
|
||||||
|
self.infer_request = self.compiled_model.create_infer_request()
|
||||||
|
|
||||||
# Create reusable inference request
|
|
||||||
self.infer_request = self.compiled_model.create_infer_request()
|
|
||||||
self.input_tensor: ov.Tensor | None = None
|
self.input_tensor: ov.Tensor | None = None
|
||||||
|
|
||||||
# Thread lock to prevent concurrent inference (needed for JinaV2 which shares
|
# Shared, process-wide lock serializing inference across all OpenVINO
|
||||||
# one runner between text and vision embeddings called from different threads)
|
# runners in this process. Needed both for the JinaV2 case (one runner
|
||||||
self._inference_lock = threading.Lock()
|
# shared between text and vision threads) and to prevent two *different*
|
||||||
|
# runners (e.g. an ArcFace face-model build thread and the LPR detector)
|
||||||
|
# from inferring concurrently and corrupting shared OpenVINO state.
|
||||||
|
self._inference_lock = _OPENVINO_LOCK
|
||||||
|
|
||||||
if not self.complex_model:
|
if not self.complex_model:
|
||||||
try:
|
try:
|
||||||
@ -382,8 +390,10 @@ class OpenVINOModelRunner(BaseModelRunner):
|
|||||||
Returns:
|
Returns:
|
||||||
List of output tensors
|
List of output tensors
|
||||||
"""
|
"""
|
||||||
# Lock prevents concurrent access to infer_request
|
# Shared lock serializes inference across every OpenVINO runner in this
|
||||||
# Needed for JinaV2: genai thread (text) + embeddings thread (vision)
|
# process — both the shared-runner JinaV2 case (genai text thread +
|
||||||
|
# embeddings vision thread) and distinct runners running on separate
|
||||||
|
# threads (e.g. the ArcFace face-model build vs the LPR detector).
|
||||||
with self._inference_lock:
|
with self._inference_lock:
|
||||||
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user