Use thread lock for JinaV2 call as it sets multiple internal fields while being called

This commit is contained in:
Nicolas Mowen 2025-12-27 06:28:39 -07:00
parent 3c5eb1aee5
commit 25b36a1a7a

View File

@ -3,6 +3,7 @@
import io
import logging
import os
import threading
import numpy as np
from PIL import Image
@ -53,6 +54,11 @@ class JinaV2Embedding(BaseEmbedding):
self.tokenizer = None
self.image_processor = None
self.runner = None
# Lock to prevent concurrent calls (text and vision share this instance)
self._call_lock = threading.Lock()
# download the model and tokenizer
files_names = list(self.download_urls.keys()) + [self.tokenizer_file]
if not all(
os.path.exists(os.path.join(self.download_path, n)) for n in files_names
@ -200,6 +206,9 @@ class JinaV2Embedding(BaseEmbedding):
def __call__(
self, inputs: list[str] | list[Image.Image] | list[str], embedding_type=None
) -> list[np.ndarray]:
# Lock the entire call to prevent race conditions when text and vision
# embeddings are called concurrently from different threads
with self._call_lock:
self.embedding_type = embedding_type
if not self.embedding_type:
raise ValueError(