diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py
index 6b9c1531b..5cadb3d52 100644
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import platform
+import threading
 from abc import ABC, abstractmethod
 from typing import Any
 
@@ -290,6 +291,10 @@ class OpenVINOModelRunner(BaseModelRunner):
         self.infer_request = self.compiled_model.create_infer_request()
         self.input_tensor: ov.Tensor | None = None
 
+        # Thread lock to prevent concurrent inference (needed for JinaV2 which shares
+        # one runner between text and vision embeddings called from different threads)
+        self._inference_lock = threading.Lock()
+
         if not self.complex_model:
             try:
                 input_shape = self.compiled_model.inputs[0].get_shape()
@@ -333,67 +338,70 @@ class OpenVINOModelRunner(BaseModelRunner):
         Returns:
             List of output tensors
         """
-        # Handle single input case for backward compatibility
-        if (
-            len(inputs) == 1
-            and len(self.compiled_model.inputs) == 1
-            and self.input_tensor is not None
-        ):
-            # Single input case - use the pre-allocated tensor for efficiency
-            input_data = list(inputs.values())[0]
-            np.copyto(self.input_tensor.data, input_data)
-            self.infer_request.infer(self.input_tensor)
-        else:
-            if self.complex_model:
-                try:
-                    # This ensures the model starts with a clean state for each sequence
-                    # Important for RNN models like PaddleOCR recognition
-                    self.infer_request.reset_state()
-                except Exception:
-                    # this will raise an exception for models with AUTO set as the device
-                    pass
+        # Lock prevents concurrent access to infer_request
+        # Needed for JinaV2: genai thread (text) + embeddings thread (vision)
+        with self._inference_lock:
+            # Handle single input case for backward compatibility
+            if (
+                len(inputs) == 1
+                and len(self.compiled_model.inputs) == 1
+                and self.input_tensor is not None
+            ):
+                # Single input case - use the pre-allocated tensor for efficiency
+                input_data = list(inputs.values())[0]
+                np.copyto(self.input_tensor.data, input_data)
+                self.infer_request.infer(self.input_tensor)
+            else:
+                if self.complex_model:
+                    try:
+                        # This ensures the model starts with a clean state for each sequence
+                        # Important for RNN models like PaddleOCR recognition
+                        self.infer_request.reset_state()
+                    except Exception:
+                        # this will raise an exception for models with AUTO set as the device
+                        pass
 
-            # Multiple inputs case - set each input by name
-            for input_name, input_data in inputs.items():
-                # Find the input by name and its index
-                input_port = None
-                input_index = None
-                for idx, port in enumerate(self.compiled_model.inputs):
-                    if port.get_any_name() == input_name:
-                        input_port = port
-                        input_index = idx
-                        break
+                # Multiple inputs case - set each input by name
+                for input_name, input_data in inputs.items():
+                    # Find the input by name and its index
+                    input_port = None
+                    input_index = None
+                    for idx, port in enumerate(self.compiled_model.inputs):
+                        if port.get_any_name() == input_name:
+                            input_port = port
+                            input_index = idx
+                            break
 
-                if input_port is None:
-                    raise ValueError(f"Input '{input_name}' not found in model")
+                    if input_port is None:
+                        raise ValueError(f"Input '{input_name}' not found in model")
 
-                # Create tensor with the correct element type
-                input_element_type = input_port.get_element_type()
+                    # Create tensor with the correct element type
+                    input_element_type = input_port.get_element_type()
 
-                # Ensure input data matches the expected dtype to prevent type mismatches
-                # that can occur with models like Jina-CLIP v2 running on OpenVINO
-                expected_dtype = input_element_type.to_dtype()
-                if input_data.dtype != expected_dtype:
-                    logger.debug(
-                        f"Converting input '{input_name}' from {input_data.dtype} to {expected_dtype}"
-                    )
-                    input_data = input_data.astype(expected_dtype)
+                    # Ensure input data matches the expected dtype to prevent type mismatches
+                    # that can occur with models like Jina-CLIP v2 running on OpenVINO
+                    expected_dtype = input_element_type.to_dtype()
+                    if input_data.dtype != expected_dtype:
+                        logger.debug(
+                            f"Converting input '{input_name}' from {input_data.dtype} to {expected_dtype}"
+                        )
+                        input_data = input_data.astype(expected_dtype)
 
-                input_tensor = ov.Tensor(input_element_type, input_data.shape)
-                np.copyto(input_tensor.data, input_data)
+                    input_tensor = ov.Tensor(input_element_type, input_data.shape)
+                    np.copyto(input_tensor.data, input_data)
 
-                # Set the input tensor for the specific port index
-                self.infer_request.set_input_tensor(input_index, input_tensor)
+                    # Set the input tensor for the specific port index
+                    self.infer_request.set_input_tensor(input_index, input_tensor)
 
-            # Run inference
-            self.infer_request.infer()
+                # Run inference
+                self.infer_request.infer()
 
-        # Get all output tensors
-        outputs = []
-        for i in range(len(self.compiled_model.outputs)):
-            outputs.append(self.infer_request.get_output_tensor(i).data)
+            # Get all output tensors
+            outputs = []
+            for i in range(len(self.compiled_model.outputs)):
+                outputs.append(self.infer_request.get_output_tensor(i).data)
 
-        return outputs
+            return outputs
 
 
 class RKNNModelRunner(BaseModelRunner):