Use thread lock for openvino to avoid concurrent requests with JinaV2

This commit is contained in:
Nicolas Mowen 2025-11-07 08:46:43 -07:00
parent 2376bcaf97
commit ab3ded38e6

View File

@ -3,6 +3,7 @@
import logging
import os
import platform
import threading
from abc import ABC, abstractmethod
from typing import Any
@ -290,6 +291,10 @@ class OpenVINOModelRunner(BaseModelRunner):
self.infer_request = self.compiled_model.create_infer_request()
self.input_tensor: ov.Tensor | None = None
# Thread lock to prevent concurrent inference (needed for JinaV2 which shares
# one runner between text and vision embeddings called from different threads)
self._inference_lock = threading.Lock()
if not self.complex_model:
try:
input_shape = self.compiled_model.inputs[0].get_shape()
@ -333,6 +338,9 @@ class OpenVINOModelRunner(BaseModelRunner):
Returns:
List of output tensors
"""
# Lock prevents concurrent access to infer_request
# Needed for JinaV2: genai thread (text) + embeddings thread (vision)
with self._inference_lock:
# Handle single input case for backward compatibility
if (
len(inputs) == 1