Merge 03802a9ac3 into acdfed40a9

2026-03-10 02:29:19 +03:00 · 2026-03-08 15:42:53 +00:00 · 2026-03-08 15:42:53 +00:00 · 85e4f7171b
commit 85e4f7171b
parent acdfed40a9 03802a9ac3
11 changed files with 639 additions and 18 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -223,4 +223,4 @@ jobs:
          tags: ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}
          sources: |
            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-amd64
-            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi
+            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi
--- a/docker/axcl/user_installation.sh
+++ b/docker/axcl/user_installation.sh
@ -0,0 +1,110 @@
+#!/bin/bash
+
+set -e
+
+# Function to clean up on error
+cleanup() {
+    echo "Cleaning up temporary files..."
+    rm -f "$deb_file"
+}
+
+trap cleanup ERR
+trap 'echo "Script interrupted by user (Ctrl+C)"; cleanup; exit 130' INT
+
+# Update package list and install dependencies
+echo "Updating package list and installing dependencies..."
+sudo apt-get update
+sudo apt-get install -y build-essential cmake git wget pciutils kmod udev
+
+# Check if gcc-12 is needed
+echo "Checking GCC version..."
+current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}')
+if ! dpkg --compare-versions "$current_gcc_version" ge "12" 2>/dev/null; then
+    echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..."
+    sudo apt-get install -y gcc-12
+    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
+    echo "GCC-12 installed and set as default"
+else
+    echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation"
+fi
+
+# Determine architecture
+echo "Determining system architecture..."
+arch=$(uname -m)
+download_url=""
+
+if [[ $arch == "x86_64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_x86_64_V3.10.2_20251111020143_NO5046.deb"
+    deb_file="axcl.deb"
+elif [[ $arch == "aarch64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_aarch64_V3.10.2_20251111020143_NO5046.deb"
+    deb_file="axcl.deb"
+else
+    echo "Unsupported architecture: $arch"
+    exit 1
+fi
+
+# Check for required Linux headers before downloading
+echo "Checking for required Linux headers..."
+kernel_version=$(uname -r)
+if dpkg -l | grep -q "linux-headers-${kernel_version}" || [ -d "/lib/modules/${kernel_version}/build" ]; then
+    echo "Linux headers or kernel modules directory found for kernel ${kernel_version}/build."
+else
+    echo "Linux headers for kernel ${kernel_version} not found. Please install them first: sudo apt-get install linux-headers-${kernel_version}"
+    exit 1
+fi
+
+# Download AXCL driver
+echo "Downloading AXCL driver for $arch..."
+wget --timeout=30 --tries=3 "$download_url" -O "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to download AXCL driver after retries"
+    exit 1
+fi
+
+# Install AXCL driver
+echo "Installing AXCL driver..."
+sudo dpkg -i "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to install AXCL driver, attempting to fix dependencies..."
+    sudo apt-get install -f -y
+    sudo dpkg -i "$deb_file"
+
+    if [ $? -ne 0 ]; then
+        echo "AXCL driver installation failed after dependency fix"
+        exit 1
+    fi
+fi
+
+# Update environment
+echo "Updating environment..."
+source /etc/profile
+
+# Verify installation
+echo "Verifying AXCL installation..."
+if command -v axcl-smi &> /dev/null; then
+    echo "AXCL driver detected, checking AI accelerator status..."
+
+    axcl_output=$(axcl-smi 2>&1)
+    axcl_exit_code=$?
+
+    echo "$axcl_output"
+
+    if [ $axcl_exit_code -eq 0 ]; then
+        echo "AXCL driver installation completed successfully!"
+    else
+        echo "AXCL driver installed but no AI accelerator detected or communication failed."
+        echo "Please check if the AI accelerator is properly connected and powered on."
+        exit 1
+    fi
+else
+    echo "axcl-smi command not found. AXCL driver installation may have failed."
+    exit 1
+fi
+
+# Clean up
+echo "Cleaning up temporary files..."
+rm -f "$deb_file"
+echo "Installation script completed."
--- a/docker/main/Dockerfile
+++ b/docker/main/Dockerfile
@ -266,6 +266,16 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \
 RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
    pip3 install -U /deps/wheels/*.whl

+####
+#
+# AXEngine Support
+#
+####
+RUN pip3 install https://github.com/ivanshi1108/pyaxengine/releases/download/0.1.3-frigate/axengine-0.1.3-py3-none-any.whl
+
+ENV PATH="${PATH}:/usr/bin/axcl"
+ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/lib/axcl"
+
 # Install MemryX runtime (requires libgomp (OpenMP) in the final docker image)
 RUN --mount=type=bind,source=docker/main/install_memryx.sh,target=/deps/install_memryx.sh \
    bash -c "bash /deps/install_memryx.sh"
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -49,6 +49,11 @@ Frigate supports multiple different detectors that work on different types of ha

 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs.

+**AXERA** <CommunityBadge />
+
+- [AXEngine](#axera): axmodels can run on AXERA AI acceleration.
+
+
 **For Testing**

 - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
@ -1478,6 +1483,41 @@ model:
  input_pixel_format: rgb/bgr # look at the model.json to figure out which to put here
 ```

+## AXERA
+
+Hardware accelerated object detection is supported on the following SoCs:
+
+- AX650N
+- AX8850N
+
+This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2).
+
+See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware.
+
+### Configuration
+
+When configuring the AXEngine detector, you have to specify the model name.
+
+#### yolov9
+
+A yolov9 model is provided in the container at /axmodels and is used by this detector type by default.
+
+Use the model configuration shown below when using the axengine detector with the default axmodel:
+
+```yaml
+detectors:
+  axengine:
+    type: axengine
+
+model:
+  path: frigate-yolov9-tiny
+  model_type: yolo-generic
+  width: 320
+  height: 320
+  tensor_format: bgr
+  labelmap_path: /labelmap/coco-80.txt
+```
+
 # Models

 Some model types are not included in Frigate by default.
@ -1571,12 +1611,12 @@ YOLOv9 model can be exported as ONNX using the command below. You can copy and p
 ```sh
 docker build . --build-arg MODEL_SIZE=t --build-arg IMG_SIZE=320 --output . -f- <<'EOF'
 FROM python:3.11 AS build
-RUN apt-get update && apt-get install --no-install-recommends -y libgl1 && rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.8.0 /uv /bin/
+RUN apt-get update && apt-get install --no-install-recommends -y cmake libgl1 && rm -rf /var/lib/apt/lists/*
+COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /bin/
 WORKDIR /yolov9
 ADD https://github.com/WongKinYiu/yolov9.git .
 RUN uv pip install --system -r requirements.txt
-RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier>=0.4.1 onnxscript
+RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier==0.4.* onnxscript
 ARG MODEL_SIZE
 ARG IMG_SIZE
 ADD https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-${MODEL_SIZE}-converted.pt yolov9-${MODEL_SIZE}.pt
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@ -103,6 +103,10 @@ Frigate supports multiple different detectors that work on different types of ha

 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection.

+**AXERA** <CommunityBadge />
+
+- [AXEngine](#axera): axera models can run on AXERA NPUs via AXEngine, delivering highly efficient object detection.
+
 :::

 ### Hailo-8
@ -288,6 +292,14 @@ The inference time of a rk3588 with all 3 cores enabled is typically 25-30 ms fo
 | ssd mobilenet | ~ 25 ms                         |
 | yolov5m       | ~ 118 ms                        |

+### AXERA
+
+- **AXEngine** Default model is **yolov9**
+
+| Name             | AXERA AX650N/AX8850N Inference Time |
+| ---------------- | ----------------------------------- |
+| yolov9-tiny      | ~ 4 ms                              |
+
 ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version)

 This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity.
@ -308,4 +320,4 @@ Basically - When you increase the resolution and/or the frame rate of the stream

 YES! The Coral does not help with decoding video streams.

-Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work.
+Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work.
--- a/docs/docs/frigate/installation.md
+++ b/docs/docs/frigate/installation.md
@ -439,6 +439,42 @@ or add these options to your `docker run` command:

 Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics).

+### AXERA
+
+<details>
+<summary>AXERA accelerators</summary>
+AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments.
+
+#### Installation
+
+Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation.
+
+Follow these steps for installation:
+
+1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh).
+2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh`
+3. Run the script with `./user_installation.sh`
+
+#### Setup
+
+To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable`
+
+Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file:
+
+```yaml
+devices:
+  - /dev/axcl_host
+  - /dev/ax_mmb_dev
+  - /dev/msg_userdev
+```
+
+If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev`
+
+#### Configuration
+
+Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup.
+</details>
+
 ## Docker

 Running through Docker with Docker Compose is the recommended install method.
--- a/docs/docs/frigate/video_pipeline.md
+++ b/docs/docs/frigate/video_pipeline.md
@ -37,18 +37,18 @@ The following diagram adds a lot more detail than the simple view explained befo
 %%{init: {"themeVariables": {"edgeLabelBackground": "transparent"}}}%%

 flowchart TD
-    RecStore[(Recording\nstore)]
-    SnapStore[(Snapshot\nstore)]
+    RecStore[(Recording<br>store)]
+    SnapStore[(Snapshot<br>store)]

    subgraph Acquisition
        Cam["Camera"] -->|FFmpeg supported| Stream
-        Cam -->|"Other streaming\nprotocols"| go2rtc
+        Cam -->|"Other streaming<br>protocols"| go2rtc
        go2rtc("go2rtc") --> Stream
-        Stream[Capture main and\nsub streams] --> |detect stream|Decode(Decode and\ndownscale)
+        Stream[Capture main and<br>sub streams] --> |detect stream|Decode(Decode and<br>downscale)
    end
    subgraph Motion
-        Decode --> MotionM(Apply\nmotion masks)
-        MotionM --> MotionD(Motion\ndetection)
+        Decode --> MotionM(Apply<br>motion masks)
+        MotionM --> MotionD(Motion<br>detection)
    end
    subgraph Detection
        MotionD --> |motion regions| ObjectD(Object detection)
@ -60,8 +60,8 @@ flowchart TD
    MotionD --> |motion event|Birdseye
    ObjectZ --> |object event|Birdseye

-    MotionD --> |"video segments\n(retain motion)"|RecStore
+    MotionD --> |"video segments<br>(retain motion)"|RecStore
    ObjectZ --> |detection clip|RecStore
-    Stream -->|"video segments\n(retain all)"| RecStore
+    Stream -->|"video segments<br>(retain all)"| RecStore
    ObjectZ --> |detection snapshot|SnapStore
 ```
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@ -546,12 +546,140 @@ class RKNNModelRunner(BaseModelRunner):
                pass


+class AXEngineModelRunner(BaseModelRunner):
+    """Run AXEngine models for embeddings."""
+
+    _mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32).reshape(
+        1, 3, 1, 1
+    )
+    _std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32).reshape(
+        1, 3, 1, 1
+    )
+
+    def __init__(self, model_path: str, model_type: str | None = None):
+        self.model_path = model_path
+        self.model_type = model_type
+        self._inference_lock = threading.Lock()
+        self.image_session = None
+        self.text_session = None
+        self.text_pad_token_id = 0
+        self._load_model()
+
+    def _load_model(self):
+        try:
+            import axengine as axe
+            from transformers import AutoTokenizer
+        except ImportError:
+            logger.error("AXEngine is not available")
+            raise ImportError("AXEngine is not available")
+
+        model_dir = os.path.dirname(self.model_path)
+        image_model_path = os.path.join(model_dir, "image_encoder.axmodel")
+        text_model_path = os.path.join(model_dir, "text_encoder.axmodel")
+        tokenizer_path = os.path.join(model_dir, "tokenizer")
+
+        self.image_session = axe.InferenceSession(image_model_path)
+        self.text_session = axe.InferenceSession(text_model_path)
+
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(
+                tokenizer_path,
+                trust_remote_code=True,
+                clean_up_tokenization_spaces=True,
+            )
+            if tokenizer.pad_token_id is not None:
+                self.text_pad_token_id = int(tokenizer.pad_token_id)
+        except Exception:
+            logger.warning(
+                "Failed to load tokenizer from %s for AXEngine padding, using 0",
+                tokenizer_path,
+            )
+
+    def get_input_names(self) -> list[str]:
+        return ["input_ids", "pixel_values"]
+
+    def get_input_width(self) -> int:
+        return 512
+
+    @staticmethod
+    def _has_real_text_inputs(inputs: dict[str, Any]) -> bool:
+        input_ids = inputs.get("input_ids")
+
+        if input_ids is None:
+            return False
+
+        if input_ids.ndim < 2:
+            return False
+
+        return input_ids.shape[-1] != 16 or np.any(input_ids)
+
+    @staticmethod
+    def _has_real_image_inputs(inputs: dict[str, Any]) -> bool:
+        pixel_values = inputs.get("pixel_values")
+
+        return pixel_values is not None and np.any(pixel_values)
+
+    def _prepare_text_inputs(self, input_ids: np.ndarray) -> np.ndarray:
+        padded_input_ids = np.full((1, 50), self.text_pad_token_id, dtype=np.int32)
+        truncated_input_ids = input_ids.reshape(1, -1)[:, :50].astype(np.int32)
+        padded_input_ids[:, : truncated_input_ids.shape[1]] = truncated_input_ids
+        return padded_input_ids
+
+    @classmethod
+    def _prepare_pixel_values(cls, pixel_values: np.ndarray) -> np.ndarray:
+        if len(pixel_values.shape) == 3:
+            pixel_values = pixel_values[None, ...]
+
+        pixel_values = pixel_values.astype(np.float32)
+        return (pixel_values - cls._mean) / cls._std
+
+    def run(self, inputs: dict[str, Any]) -> list[np.ndarray | None]:
+        outputs: list[np.ndarray | None] = [None, None, None, None]
+
+        with self._inference_lock:
+            if self._has_real_text_inputs(inputs):
+                text_embeddings = []
+                for input_ids in inputs["input_ids"]:
+                    text_embeddings.append(
+                        self.text_session.run(
+                            None,
+                            {"inputs_id": self._prepare_text_inputs(input_ids)},
+                        )[0][0]
+                    )
+                outputs[2] = np.array(text_embeddings)
+
+            if self._has_real_image_inputs(inputs):
+                image_embeddings = []
+                for pixel_values in inputs["pixel_values"]:
+                    image_embeddings.append(
+                        self.image_session.run(
+                            None,
+                            {"pixel_values": self._prepare_pixel_values(pixel_values)},
+                        )[0][0]
+                    )
+
+                outputs[3] = np.array(image_embeddings)
+
+        return outputs
+
+
 def get_optimized_runner(
    model_path: str, device: str | None, model_type: str, **kwargs
 ) -> BaseModelRunner:
    """Get an optimized runner for the hardware."""
    device = device or "AUTO"

+    from frigate.util.axengine_converter import (
+        auto_convert_model as auto_load_axengine_model,
+    )
+    from frigate.util.axengine_converter import is_axengine_compatible
+
+    if is_axengine_compatible(model_path, device, model_type):
+        axmodel_path = auto_load_axengine_model(model_path, model_type)
+
+        if axmodel_path:
+            return AXEngineModelRunner(axmodel_path, model_type)
+
    if device != "CPU" and is_rknn_compatible(model_path):
        rknn_path = auto_convert_model(model_path)

--- a/frigate/detectors/plugins/axengine.py
+++ b/frigate/detectors/plugins/axengine.py
@ -0,0 +1,86 @@
+import logging
+import os.path
+import re
+import urllib.request
+from typing import Literal
+
+import axengine as axe
+
+from frigate.const import MODEL_CACHE_DIR
+from frigate.detectors.detection_api import DetectionApi
+from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
+from frigate.util.model import post_process_yolo
+
+logger = logging.getLogger(__name__)
+
+DETECTOR_KEY = "axengine"
+
+supported_models = {
+    ModelTypeEnum.yologeneric: "frigate-yolov9-.*$",
+}
+
+model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/")
+
+
+class AxengineDetectorConfig(BaseDetectorConfig):
+    type: Literal[DETECTOR_KEY]
+
+
+class Axengine(DetectionApi):
+    type_key = DETECTOR_KEY
+
+    def __init__(self, config: AxengineDetectorConfig):
+        logger.info("__init__ axengine")
+        super().__init__(config)
+        self.height = config.model.height
+        self.width = config.model.width
+        model_path = config.model.path or "frigate-yolov9-tiny"
+        model_props = self.parse_model_input(model_path)
+        self.session = axe.InferenceSession(model_props["path"])
+
+    def __del__(self):
+        pass
+
+    def parse_model_input(self, model_path):
+        model_props = {}
+        model_props["preset"] = True
+
+        model_matched = False
+
+        for model_type, pattern in supported_models.items():
+            if re.match(pattern, model_path):
+                model_matched = True
+                model_props["model_type"] = model_type
+
+        if model_matched:
+            model_props["filename"] = model_path + ".axmodel"
+            model_props["path"] = model_cache_dir + model_props["filename"]
+
+            if not os.path.isfile(model_props["path"]):
+                self.download_model(model_props["filename"])
+        else:
+            supported_models_str = ", ".join(model[1:-1] for model in supported_models)
+            raise Exception(
+                f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}"
+            )
+        return model_props
+
+    def download_model(self, filename):
+        if not os.path.isdir(model_cache_dir):
+            os.mkdir(model_cache_dir)
+
+        GITHUB_ENDPOINT = os.environ.get("GITHUB_ENDPOINT", "https://github.com")
+        urllib.request.urlretrieve(
+            f"{GITHUB_ENDPOINT}/ivanshi1108/assets/releases/download/v0.16.2/{filename}",
+            model_cache_dir + filename,
+        )
+
+    def detect_raw(self, tensor_input):
+        results = None
+        results = self.session.run(None, {"images": tensor_input})
+        if self.detector_config.model.model_type == ModelTypeEnum.yologeneric:
+            return post_process_yolo(results, self.width, self.height)
+        else:
+            raise ValueError(
+                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
+            )
--- a/frigate/embeddings/onnx/jina_v2_embedding.py
+++ b/frigate/embeddings/onnx/jina_v2_embedding.py
@ -37,13 +37,18 @@ class JinaV2Embedding(BaseEmbedding):
            "model_fp16.onnx" if model_size == "large" else "model_quantized.onnx"
        )
        HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
+        use_axengine = (device or "").upper() == "AXENGINE"
        super().__init__(
            model_name="jinaai/jina-clip-v2",
            model_file=model_file,
-            download_urls={
-                model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}",
-                "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json",
-            },
+            download_urls=(
+                {}
+                if use_axengine
+                else {
+                    model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}",
+                    "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json",
+                }
+            ),
        )
        self.tokenizer_file = "tokenizer"
        self.embedding_type = embedding_type
@ -59,7 +64,11 @@ class JinaV2Embedding(BaseEmbedding):
        self._call_lock = threading.Lock()

        # download the model and tokenizer
-        files_names = list(self.download_urls.keys()) + [self.tokenizer_file]
+        files_names = (
+            [self.tokenizer_file]
+            if use_axengine
+            else list(self.download_urls.keys()) + [self.tokenizer_file]
+        )
        if not all(
            os.path.exists(os.path.join(self.download_path, n)) for n in files_names
        ):
--- a/frigate/util/axengine_converter.py
+++ b/frigate/util/axengine_converter.py
@ -0,0 +1,190 @@
+"""AXEngine model loading utility for Frigate."""
+
+import logging
+import os
+import time
+from pathlib import Path
+
+from frigate.comms.inter_process import InterProcessRequestor
+from frigate.const import UPDATE_MODEL_STATE
+from frigate.types import ModelStatusTypesEnum
+from frigate.util.downloader import ModelDownloader
+from frigate.util.file import FileLock
+
+logger = logging.getLogger(__name__)
+
+AXENGINE_JINA_V2_MODEL = "jina_v2"
+AXENGINE_JINA_V2_REPO = "AXERA-TECH/jina-clip-v2"
+
+
+def get_axengine_model_type(model_path: str) -> str | None:
+    if "jina-clip-v2" in str(model_path):
+        return AXENGINE_JINA_V2_MODEL
+
+    return None
+
+
+def is_axengine_compatible(
+    model_path: str, device: str | None, model_type: str | None = None
+) -> bool:
+    if (device or "").upper() != "AXENGINE":
+        return False
+
+    if not model_type:
+        model_type = get_axengine_model_type(model_path)
+
+    return model_type == AXENGINE_JINA_V2_MODEL
+
+
+def wait_for_download_completion(
+    image_model_path: Path,
+    text_model_path: Path,
+    lock_path: Path,
+    timeout: int = 300,
+) -> bool:
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        if image_model_path.exists() and text_model_path.exists():
+            return True
+
+        if not lock_path.exists():
+            return image_model_path.exists() and text_model_path.exists()
+
+        time.sleep(1)
+
+    logger.warning("Timeout waiting for AXEngine model files: %s", image_model_path)
+    return False
+
+
+def auto_convert_model(model_path: str, model_type: str | None = None) -> str | None:
+    """Prepare AXEngine model files and return the image encoder path."""
+    if not is_axengine_compatible(model_path, "AXENGINE", model_type):
+        return None
+
+    model_dir = Path(model_path).parent
+    ui_model_key = f"jinaai/jina-clip-v2-{Path(model_path).name}"
+    ui_preprocessor_key = "jinaai/jina-clip-v2-preprocessor_config.json"
+    image_model_path = model_dir / "image_encoder.axmodel"
+    text_model_path = model_dir / "text_encoder.axmodel"
+    model_repo = os.environ.get("AXENGINE_JINA_V2_REPO", AXENGINE_JINA_V2_REPO)
+    hf_endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
+    requestor = InterProcessRequestor()
+
+    download_targets = {
+        "image_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/image_encoder.axmodel",
+        "text_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/text_encoder.axmodel",
+    }
+
+    if image_model_path.exists() and text_model_path.exists():
+        requestor.send_data(
+            UPDATE_MODEL_STATE,
+            {
+                "model": ui_preprocessor_key,
+                "state": ModelStatusTypesEnum.downloaded,
+            },
+        )
+        requestor.send_data(
+            UPDATE_MODEL_STATE,
+            {
+                "model": ui_model_key,
+                "state": ModelStatusTypesEnum.downloaded,
+            },
+        )
+        requestor.stop()
+        return str(image_model_path)
+
+    lock_path = model_dir / ".axengine.download.lock"
+    lock = FileLock(lock_path, timeout=300, cleanup_stale_on_init=True)
+
+    if lock.acquire():
+        try:
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_preprocessor_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloading,
+                },
+            )
+
+            for file_name, url in download_targets.items():
+                target_path = model_dir / file_name
+                if target_path.exists():
+                    continue
+
+                target_path.parent.mkdir(parents=True, exist_ok=True)
+                ModelDownloader.download_from_url(url, str(target_path))
+
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+
+            return str(image_model_path)
+        except Exception:
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.error,
+                },
+            )
+            logger.exception(
+                "Failed to prepare AXEngine model files for %s", model_repo
+            )
+            return None
+        finally:
+            requestor.stop()
+            lock.release()
+
+    logger.info("Another process is preparing AXEngine models, waiting for completion")
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_preprocessor_key,
+            "state": ModelStatusTypesEnum.downloaded,
+        },
+    )
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_model_key,
+            "state": ModelStatusTypesEnum.downloading,
+        },
+    )
+    requestor.stop()
+
+    if wait_for_download_completion(image_model_path, text_model_path, lock_path):
+        if image_model_path.exists() and text_model_path.exists():
+            requestor = InterProcessRequestor()
+            requestor.send_data(
+                UPDATE_MODEL_STATE,
+                {
+                    "model": ui_model_key,
+                    "state": ModelStatusTypesEnum.downloaded,
+                },
+            )
+            requestor.stop()
+            return str(image_model_path)
+
+    logger.error("Timeout waiting for AXEngine model download lock for %s", model_dir)
+    requestor = InterProcessRequestor()
+    requestor.send_data(
+        UPDATE_MODEL_STATE,
+        {
+            "model": ui_model_key,
+            "state": ModelStatusTypesEnum.error,
+        },
+    )
+    requestor.stop()
+    return None