Merge 03802a9ac3 into acdfed40a9

2026-03-10 10:33:11 +03:00 · 2026-03-08 15:42:53 +00:00 · 2026-03-08 15:42:53 +00:00 · 85e4f7171b
commit 85e4f7171b
parent acdfed40a9 03802a9ac3
11 changed files with 639 additions and 18 deletions
--- a/docker/axcl/user_installation.sh
+++ b/docker/axcl/user_installation.sh
@ -0,0 +1,110 @@
+#!/bin/bash
+
+set -e
+
+# Function to clean up on error
+cleanup() {
+    echo "Cleaning up temporary files..."
+    rm -f "$deb_file"
+}
+
+trap cleanup ERR
+trap 'echo "Script interrupted by user (Ctrl+C)"; cleanup; exit 130' INT
+
+# Update package list and install dependencies
+echo "Updating package list and installing dependencies..."
+sudo apt-get update
+sudo apt-get install -y build-essential cmake git wget pciutils kmod udev
+
+# Check if gcc-12 is needed
+echo "Checking GCC version..."
+current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}')
+if ! dpkg --compare-versions "$current_gcc_version" ge "12" 2>/dev/null; then
+    echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..."
+    sudo apt-get install -y gcc-12
+    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
+    echo "GCC-12 installed and set as default"
+else
+    echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation"
+fi
+
+# Determine architecture
+echo "Determining system architecture..."
+arch=$(uname -m)
+download_url=""
+
+if [[ $arch == "x86_64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_x86_64_V3.10.2_20251111020143_NO5046.deb"
+    deb_file="axcl.deb"
+elif [[ $arch == "aarch64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_aarch64_V3.10.2_20251111020143_NO5046.deb"
+    deb_file="axcl.deb"
+else
+    echo "Unsupported architecture: $arch"
+    exit 1
+fi
+
+# Check for required Linux headers before downloading
+echo "Checking for required Linux headers..."
+kernel_version=$(uname -r)
+if dpkg -l | grep -q "linux-headers-${kernel_version}" || [ -d "/lib/modules/${kernel_version}/build" ]; then
+    echo "Linux headers or kernel modules directory found for kernel ${kernel_version}/build."
+else
+    echo "Linux headers for kernel ${kernel_version} not found. Please install them first: sudo apt-get install linux-headers-${kernel_version}"
+    exit 1
+fi
+
+# Download AXCL driver
+echo "Downloading AXCL driver for $arch..."
+wget --timeout=30 --tries=3 "$download_url" -O "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to download AXCL driver after retries"
+    exit 1
+fi
+
+# Install AXCL driver
+echo "Installing AXCL driver..."
+sudo dpkg -i "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to install AXCL driver, attempting to fix dependencies..."
+    sudo apt-get install -f -y
+    sudo dpkg -i "$deb_file"
+
+    if [ $? -ne 0 ]; then
+        echo "AXCL driver installation failed after dependency fix"
+        exit 1
+    fi
+fi
+
+# Update environment
+echo "Updating environment..."
+source /etc/profile
+
+# Verify installation
+echo "Verifying AXCL installation..."
+if command -v axcl-smi &> /dev/null; then
+    echo "AXCL driver detected, checking AI accelerator status..."
+
+    axcl_output=$(axcl-smi 2>&1)
+    axcl_exit_code=$?
+
+    echo "$axcl_output"
+
+    if [ $axcl_exit_code -eq 0 ]; then
+        echo "AXCL driver installation completed successfully!"
+    else
+        echo "AXCL driver installed but no AI accelerator detected or communication failed."
+        echo "Please check if the AI accelerator is properly connected and powered on."
+        exit 1
+    fi
+else
+    echo "axcl-smi command not found. AXCL driver installation may have failed."
+    exit 1
+fi
+
+# Clean up
+echo "Cleaning up temporary files..."
+rm -f "$deb_file"
+echo "Installation script completed."
--- a/docker/main/Dockerfile
+++ b/docker/main/Dockerfile
@ -266,6 +266,16 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
 RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
    pip3 install -U /deps/wheels/*.whl

+####
+#
+# AXEngine Support
+#
+####
+RUN pip3 install https://github.com/ivanshi1108/pyaxengine/releases/download/0.1.3-frigate/axengine-0.1.3-py3-none-any.whl
+
+ENV PATH="${PATH}:/usr/bin/axcl"
+ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/lib/axcl"
+
 # Install MemryX runtime (requires libgomp (OpenMP) in the final docker image)
 RUN --mount=type=bind,source=docker/main/install_memryx.sh,target=/deps/install_memryx.sh \
    bash -c "bash /deps/install_memryx.sh"
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -49,6 +49,11 @@ Frigate supports multiple different detectors that work on different types of ha

 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs.

+**AXERA** <CommunityBadge />
+
+- [AXEngine](#axera): axmodels can run on AXERA AI acceleration.
+
+
 **For Testing**

 - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
@ -1478,6 +1483,41 @@ model:
  input_pixel_format: rgb/bgr # look at the model.json to figure out which to put here
 ```

+## AXERA
+
+Hardware accelerated object detection is supported on the following SoCs:
+
+- AX650N
+- AX8850N
+
+This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2).
+
+See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware.
+
+### Configuration
+
+When configuring the AXEngine detector, you have to specify the model name.
+
+#### yolov9
+
+A yolov9 model is provided in the container at /axmodels and is used by this detector type by default.
+
+Use the model configuration shown below when using the axengine detector with the default axmodel:
+
+```yaml
+detectors:
+  axengine:
+    type: axengine
+
+model:
+  path: frigate-yolov9-tiny
+  model_type: yolo-generic
+  width: 320
+  height: 320
+  tensor_format: bgr
+  labelmap_path: /labelmap/coco-80.txt
+```
+
 # Models

 Some model types are not included in Frigate by default.
@ -1571,12 +1611,12 @@ YOLOv9 model can be exported as ONNX using the command below. You can copy and p
 ```sh
 docker build . --build-arg MODEL_SIZE=t --build-arg IMG_SIZE=320 --output . -f- <<'EOF'
 FROM python:3.11 AS build
-RUN apt-get update && apt-get install --no-install-recommends -y libgl1 && rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.8.0 /uv /bin/
+RUN apt-get update && apt-get install --no-install-recommends -y cmake libgl1 && rm -rf /var/lib/apt/lists/*
+COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /bin/
 WORKDIR /yolov9
 ADD https://github.com/WongKinYiu/yolov9.git .
 RUN uv pip install --system -r requirements.txt
-RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier>=0.4.1 onnxscript
+RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier==0.4.* onnxscript
 ARG MODEL_SIZE
 ARG IMG_SIZE
 ADD https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-${MODEL_SIZE}-converted.pt yolov9-${MODEL_SIZE}.pt
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@ -103,6 +103,10 @@ Frigate supports multiple different detectors that work on different types of ha

 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection.

+**AXERA** <CommunityBadge />
+
+- [AXEngine](#axera): axera models can run on AXERA NPUs via AXEngine, delivering highly efficient object detection.
+
 :::

 ### Hailo-8
@ -288,6 +292,14 @@ The inference time of a rk3588 with all 3 cores enabled is typically 25-30 ms fo
 | ssd mobilenet | ~ 25 ms                         |
 | yolov5m       | ~ 118 ms                        |

+### AXERA
+
+- **AXEngine** Default model is **yolov9**
+
+| Name             | AXERA AX650N/AX8850N Inference Time |
+| ---------------- | ----------------------------------- |
+| yolov9-tiny      | ~ 4 ms                              |
+
 ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version)

 This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity.
--- a/docs/docs/frigate/installation.md
+++ b/docs/docs/frigate/installation.md
@ -439,6 +439,42 @@ or add these options to your `docker run` command:

 Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics).

+### AXERA
+
+<details>
+<summary>AXERA accelerators</summary>
+AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments.
+
+#### Installation
+
+Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation.
+
+Follow these steps for installation:
+
+1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh).
+2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh`
+3. Run the script with `./user_installation.sh`
+
+#### Setup
+
+To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable`
+
+Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file:
+
+```yaml
+devices:
+  - /dev/axcl_host
+  - /dev/ax_mmb_dev
+  - /dev/msg_userdev
+```
+
+If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev`
+
+#### Configuration
+
+Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup.
+</details>
+
 ## Docker

 Running through Docker with Docker Compose is the recommended install method.
--- a/docs/docs/frigate/video_pipeline.md
+++ b/docs/docs/frigate/video_pipeline.md
@ -37,18 +37,18 @@ The following diagram adds a lot more detail than the simple view explained befo
 %%{init: {"themeVariables": {"edgeLabelBackground": "transparent"}}}%%

 flowchart TD
-    RecStore[(Recording\nstore)]
-    SnapStore[(Snapshot\nstore)]
+    RecStore[(Recording<br>store)]
+    SnapStore[(Snapshot<br>store)]

    subgraph Acquisition
        Cam["Camera"] -->|FFmpeg supported| Stream
-        Cam -->|"Other streaming\nprotocols"| go2rtc
+        Cam -->|"Other streaming<br>protocols"| go2rtc
        go2rtc("go2rtc") --> Stream
-        Stream[Capture main and\nsub streams] --> |detect stream|Decode(Decode and\ndownscale)
+        Stream[Capture main and<br>sub streams] --> |detect stream|Decode(Decode and<br>downscale)
    end
    subgraph Motion
-        Decode --> MotionM(Apply\nmotion masks)
-        MotionM --> MotionD(Motion\ndetection)
+        Decode --> MotionM(Apply<br>motion masks)
+        MotionM --> MotionD(Motion<br>detection)
    end
    subgraph Detection
        MotionD --> |motion regions| ObjectD(Object detection)
@ -60,8 +60,8 @@ flowchart TD
    MotionD --> |motion event|Birdseye
    ObjectZ --> |object event|Birdseye

-    MotionD --> |"video segments\n(retain motion)"|RecStore
+    MotionD --> |"video segments<br>(retain motion)"|RecStore
    ObjectZ --> |detection clip|RecStore
-    Stream -->|"video segments\n(retain all)"| RecStore
+    Stream -->|"video segments<br>(retain all)"| RecStore
    ObjectZ --> |detection snapshot|SnapStore
 ```
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@ -546,12 +546,140 @@ class RKNNModelRunner(BaseModelRunner):
                pass


+class AXEngineModelRunner(BaseModelRunner):
+    """Run AXEngine models for embeddings."""
+
+    _mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32).reshape(
+        1, 3, 1, 1
+    )
+    _std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32).reshape(
+        1, 3, 1, 1
+    )
+
+    def __init__(self, model_path: str, model_type: str | None = None):
+        self.model_path = model_path
+        self.model_type = model_type
+        self._inference_lock = threading.Lock()
+        self.image_session = None
+        self.text_session = None
+        self.text_pad_token_id = 0
+        self._load_model()
+
+    def _load_model(self):
+        try:
+            import axengine as axe
+            from transformers import AutoTokenizer
+        except ImportError:
+            logger.error("AXEngine is not available")
+            raise ImportError("AXEngine is not available")
+
+        model_dir = os.path.dirname(self.model_path)
+        image_model_path = os.path.join(model_dir, "image_encoder.axmodel")
+        text_model_path = os.path.join(model_dir, "text_encoder.axmodel")
+        tokenizer_path = os.path.join(model_dir, "tokenizer")
+
+        self.image_session = axe.InferenceSession(image_model_path)
+        self.text_session = axe.InferenceSession(text_model_path)
+
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(
+                tokenizer_path,
+                trust_remote_code=True,
+                clean_up_tokenization_spaces=True,
+            )
+            if tokenizer.pad_token_id is not None:
+                self.text_pad_token_id = int(tokenizer.pad_token_id)
+        except Exception:
+            logger.warning(
+                "Failed to load tokenizer from %s for AXEngine padding, using 0",
+                tokenizer_path,
+            )
+
+    def get_input_names(self) -> list[str]:
+        return ["input_ids", "pixel_values"]
+
+    def get_input_width(self) -> int:
+        return 512
+
+    @staticmethod
+    def _has_real_text_inputs(inputs: dict[str, Any]) -> bool:
+        input_ids = inputs.get("input_ids")
+
+        if input_ids is None:
+            return False
+
+        if input_ids.ndim < 2:
+            return False
+
+        return input_ids.shape[-1] != 16 or np.any(input_ids)
+
+    @staticmethod
+    def _has_real_image_inputs(inputs: dict[str, Any]) -> bool:
+        pixel_values = inputs.get("pixel_values")
+
+        return pixel_values is not None and np.any(pixel_values)
+
+    def _prepare_text_inputs(self, input_ids: np.ndarray) -> np.ndarray:
+        padded_input_ids = np.full((1, 50), self.text_pad_token_id, dtype=np.int32)
+        truncated_input_ids = input_ids.reshape(1, -1)[:, :50].astype(np.int32)
+        padded_input_ids[:, : truncated_input_ids.shape[1]] = truncated_input_ids
+        return padded_input_ids
+
+    @classmethod
+    def _prepare_pixel_values(cls, pixel_values: np.ndarray) -> np.ndarray:
+        if len(pixel_values.shape) == 3:
+            pixel_values = pixel_values[None, ...]
+
+        pixel_values = pixel_values.astype(np.float32)
+        return (pixel_values - cls._mean) / cls._std
+
+    def run(self, inputs: dict[str, Any]) -> list[np.ndarray | None]:
+        outputs: list[np.ndarray | None] = [None, None, None, None]
+
+        with self._inference_lock:
+            if self._has_real_text_inputs(inputs):
+                text_embeddings = []
+                for input_ids in inputs["input_ids"]:
+                    text_embeddings.append(
+                        self.text_session.run(
+                            None,
+                            {"inputs_id": self._prepare_text_inputs(input_ids)},
+                        )[0][0]
+                    )
+                outputs[2] = np.array(text_embeddings)
+
+            if self._has_real_image_inputs(inputs):
+                image_embeddings = []
+                for pixel_values in inputs["pixel_values"]:
+                    image_embeddings.append(
+                        self.image_session.run(
+                            None,
+                            {"pixel_values": self._prepare_pixel_values(pixel_values)},
+                        )[0][0]
+                    )
+
+                outputs[3] = np.array(image_embeddings)
+
+        return outputs
+
+
 def get_optimized_runner(
    model_path: str, device: str | None, model_type: str, **kwargs
 ) -> BaseModelRunner:
    """Get an optimized runner for the hardware."""
    device = device or "AUTO"

+    from frigate.util.axengine_converter import (
+        auto_convert_model as auto_load_axengine_model,
+    )
+    from frigate.util.axengine_converter import is_axengine_compatible
+
+    if is_axengine_compatible(model_path, device, model_type):
+        axmodel_path = auto_load_axengine_model(model_path, model_type)
+
+        if axmodel_path:
+            return AXEngineModelRunner(axmodel_path, model_type)
+
    if device != "CPU" and is_rknn_compatible(model_path):
        rknn_path = auto_convert_model(model_path)

--- a/frigate/detectors/plugins/axengine.py
+++ b/frigate/detectors/plugins/axengine.py
@ -0,0 +1,86 @@
+import logging
+import os.path
+import re
+import urllib.request
+from typing import Literal
+
+import axengine as axe
+
+from frigate.const import MODEL_CACHE_DIR
+from frigate.detectors.detection_api import DetectionApi
+from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
+from frigate.util.model import post_process_yolo
+
+logger = logging.getLogger(__name__)
+
+DETECTOR_KEY = "axengine"
+
+supported_models = {
+    ModelTypeEnum.yologeneric: "frigate-yolov9-.*$",
+}
+
+model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/")
+
+
+class AxengineDetectorConfig(BaseDetectorConfig):
+    type: Literal[DETECTOR_KEY]
+
+
+class Axengine(DetectionApi):
+    type_key = DETECTOR_KEY
+
+    def __init__(self, config: AxengineDetectorConfig):
+        logger.info("__init__ axengine")
+        super().__init__(config)
+        self.height = config.model.height
+        self.width = config.model.width
+        model_path = config.model.path or "frigate-yolov9-tiny"
+        model_props = self.parse_model_input(model_path)
+        self.session = axe.InferenceSession(model_props["path"])
+
+    def __del__(self):
+        pass
+
+    def parse_model_input(self, model_path):
+        model_props = {}
+        model_props["preset"] = True
+
+        model_matched = False
+
+        for model_type, pattern in supported_models.items():
+            if re.match(pattern, model_path):
+                model_matched = True
+                model_props["model_type"] = model_type
+
+        if model_matched:
+            model_props["filename"] = model_path + ".axmodel"
+            model_props["path"] = model_cache_dir + model_props["filename"]
+
+            if not os.path.isfile(model_props["path"]):
+                self.download_model(model_props["filename"])
+        else:
+            supported_models_str = ", ".join(model[1:-1] for model in supported_models)
+            raise Exception(
+                f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}"
+            )
+        return model_props
+
+    def download_model(self, filename):
+        if not os.path.isdir(model_cache_dir):
+            os.mkdir(model_cache_dir)
+
+        GITHUB_ENDPOINT = os.environ.get("GITHUB_ENDPOINT", "https://github.com")
+        urllib.request.urlretrieve(
+            f"{GITHUB_ENDPOINT}/ivanshi1108/assets/releases/download/v0.16.2/{filename}",
+            model_cache_dir + filename,
+        )
+
+    def detect_raw(self, tensor_input):
+        results = None
+        results = self.session.run(None, {"images": tensor_input})
+        if self.detector_config.model.model_type == ModelTypeEnum.yologeneric:
+            return post_process_yolo(results, self.width, self.height)
+        else:
+            raise ValueError(
+                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
+            )
--- a/frigate/embeddings/onnx/jina_v2_embedding.py
+++ b/frigate/embeddings/onnx/jina_v2_embedding.py
@ -37,13 +37,18 @@ class JinaV2Embedding(BaseEmbedding):
            "model_fp16.onnx" if model_size == "large" else "model_quantized.onnx"
        )
        HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
+        use_axengine = (device or "").upper() == "AXENGINE"
        super().__init__(
            model_name="jinaai/jina-clip-v2",
            model_file=model_file,
-            download_urls={
-                model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}",
-                "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json",
-            },
+            download_urls=(
+                {}
+                if use_axengine
+                else {
+                    model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}",
+                    "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json",
+                }
+            ),
        )
        self.tokenizer_file = "tokenizer"
        self.embedding_type = embedding_type
@ -59,7 +64,11 @@ class JinaV2Embedding(BaseEmbedding):
        self._call_lock = threading.Lock()

        # download the model and tokenizer
-        files_names = list(self.download_urls.keys()) + [self.tokenizer_file]
+        files_names = (
+            [self.tokenizer_file]
+            if use_axengine
+            else list(self.download_urls.keys()) + [self.tokenizer_file]
+        )
        if not all(
            os.path.exists(os.path.join(self.download_path, n)) for n in files_names
        ):
--- a/frigate/util/axengine_converter.py
+++ b/frigate/util/axengine_converter.py
@ -0,0 +1,190 @@
+"""AXEngine model loading utility for Frigate."""
+
+import logging
+import os
+import time
+from pathlib import Path
+
+from frigate.comms.inter_process import InterProcessRequestor
+from frigate.const import UPDATE_MODEL_STATE
+from frigate.types import ModelStatusTypesEnum
+from frigate.util.downloader import ModelDownloader
+from frigate.util.file import FileLock
+
+logger = logging.getLogger(__name__)
+
+AXENGINE_JINA_V2_MODEL = "jina_v2"
+AXENGINE_JINA_V2_REPO = "AXERA-TECH/jina-clip-v2"
+
+
+def get_axengine_model_type(model_path: str) -> str | None:
+    if "jina-clip-v2" in str(model_path):
+        return AXENGINE_JINA_V2_MODEL
+
+    return None
+
+
+def is_axengine_compatible(
+    model_path: str, device: str | None, model_type: str | None = None
+) -> bool:
+    if (device or "").upper() != "AXENGINE":
+        return False
+
+    if not model_type:
+        model_type = get_axengine_model_type(model_path)
+
+    return model_type == AXENGINE_JINA_V2_MODEL
+
+
+def wait_for_download_completion(
+    image_model_path: Path,
+    text_model_path: Path,
+    lock_path: Path,
+    timeout: int = 300,
+) -> bool:
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        if image_model_path.exists() and text_model_path.exists():
+            return True
+
+        if not lock_path.exists():
+            return image_model_path.exists() and text_model_path.exists()
+
+        time.sleep(1)
+
+    logger.warning("Timeout waiting for AXEngine model files: %s", image_model_path)
+    return False
+
+
+def auto_convert_model(model_path: str, model_type: str | None = None) -> str | None:
+    """Prepare AXEngine model files and return the image encoder path."""
+    if not is_axengine_compatible(model_path, "AXENGINE", model_type):
+        return None
+
+    model_dir = Path(model_path).parent
+    ui_model_key = f"jinaai/jina-clip-v2-{Path(model_path).name}"
+    ui_preprocessor_key = "jinaai/jina-clip-v2-preprocessor_config.json"
+    image_model_path = model_dir / "image_encoder.axmodel"
+    text_model_path = model_dir / "text_encoder.axmodel"
+    model_repo = os.environ.get("AXENGINE_JINA_V2_REPO", AXENGINE_JINA_V2_REPO)
+    hf_endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
+    requestor = InterProcessRequestor()
+
+    download_targets = {
+        "image_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/image_encoder.axmodel",
+        "text_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/text_encoder.axmodel",
+    }
+
+    if image_model_path.exists() and text_model_path.exists():
+        requestor.send_data(
+            UPDATE_MODEL_STATE,
+            {
+                "model": ui_preprocessor_key,
+                "state": ModelStatusTypesEnum.downloaded,
+            },
+        )
+        requestor.send_data(
+            UPDATE_MODEL_STATE,
+            {
+                "model": ui_model_key,
+                "state": ModelStatusTypesEnum.downloaded,
+            },
+        )
+        requestor.stop()
+        return str(image_model_path)
+
+    lock_path = model_dir / ".axengine.download.lock"
+    lock = FileLock(lock_path, timeout=300, cleanup_stale_on_init=True)
+
+    if lock.acquire():
+        try:
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_preprocessor_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloading,
+                },
+            )
+
+            for file_name, url in download_targets.items():
+                target_path = model_dir / file_name
+                if target_path.exists():
+                    continue
+
+                target_path.parent.mkdir(parents=True, exist_ok=True)
+                ModelDownloader.download_from_url(url, str(target_path))
+
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+
+            return str(image_model_path)
+        except Exception:
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.error,
+                },
+            )
+            logger.exception(
+                "Failed to prepare AXEngine model files for %s", model_repo
+            )
+            return None
+        finally:
+            requestor.stop()
+            lock.release()
+
+    logger.info("Another process is preparing AXEngine models, waiting for completion")
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_preprocessor_key,
+            "state": ModelStatusTypesEnum.downloaded,
+        },
+    )
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_model_key,
+            "state": ModelStatusTypesEnum.downloading,
+        },
+    )
+    requestor.stop()
+
+    if wait_for_download_completion(image_model_path, text_model_path, lock_path):
+        if image_model_path.exists() and text_model_path.exists():
+            requestor = InterProcessRequestor()
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+            requestor.stop()
+            return str(image_model_path)
+
+    logger.error("Timeout waiting for AXEngine model download lock for %s", model_dir)
+    requestor = InterProcessRequestor()
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_model_key,
+            "state": ModelStatusTypesEnum.error,
+        },
+    )
+    requestor.stop()
+    return None