From 7b4eaf2d10d89d5efee9be877f0fe90ddf1a6a28 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Fri, 24 Oct 2025 08:22:56 +0000 Subject: [PATCH 01/19] Initial commit for AXERA AI accelerators --- .github/workflows/ci.yml | 26 +++ docker/axcl/Dockerfile | 59 ++++++ docker/axcl/axcl.hcl | 13 ++ docker/axcl/axcl.mk | 15 ++ docker/axcl/user_installation.sh | 83 ++++++++ docs/docs/configuration/object_detectors.md | 39 ++++ docs/docs/frigate/hardware.md | 14 ++ docs/docs/frigate/installation.md | 34 ++++ frigate/detectors/plugins/axengine.py | 201 ++++++++++++++++++++ 9 files changed, 484 insertions(+) create mode 100644 docker/axcl/Dockerfile create mode 100644 docker/axcl/axcl.hcl create mode 100644 docker/axcl/axcl.mk create mode 100755 docker/axcl/user_installation.sh create mode 100644 frigate/detectors/plugins/axengine.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dcf3070b5..60bcdf6b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -225,3 +225,29 @@ jobs: sources: | ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-amd64 ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi + axera_build: + runs-on: ubuntu-22.04 + name: AXERA Build + needs: + - amd64_build + - arm64_build + steps: + - name: Check out code + uses: actions/checkout@v5 + with: + persist-credentials: false + - name: Set up QEMU and Buildx + id: setup + uses: ./.github/actions/setup + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push Axera build + uses: docker/bake-action@v6 + with: + source: . + push: true + targets: axcl + files: docker/axcl/axcl.hcl + set: | + axcl.tags=${{ steps.setup.outputs.image-name }}-axcl + *.cache-from=type=gha \ No newline at end of file diff --git a/docker/axcl/Dockerfile b/docker/axcl/Dockerfile new file mode 100644 index 000000000..86e868b61 --- /dev/null +++ b/docker/axcl/Dockerfile @@ -0,0 +1,59 @@ +# syntax=docker/dockerfile:1.6 + +# https://askubuntu.com/questions/972516/debian-frontend-environment-variable +ARG DEBIAN_FRONTEND=noninteractive + +# Globally set pip break-system-packages option to avoid having to specify it every time +ARG PIP_BREAK_SYSTEM_PACKAGES=1 + + +FROM frigate AS frigate-axcl +ARG TARGETARCH +ARG PIP_BREAK_SYSTEM_PACKAGES + +# Install axmodels +RUN mkdir -p /axmodels \ + && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov5s_320.axmodel -O /axmodels/yolov5s_320.axmodel + +# Install axpyengine +RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl +RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \ + && rm /axengine-0.1.3-py3-none-any.whl + +# Install axcl +RUN if [ "$TARGETARCH" = "amd64" ]; then \ + echo "Installing x86_64 version of axcl"; \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \ + else \ + echo "Installing aarch64 version of axcl"; \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \ + fi + +RUN mkdir /unpack_axcl && \ + dpkg-deb -x /axcl.deb /unpack_axcl && \ + cp -R /unpack_axcl/usr/bin/axcl /usr/bin/ && \ + cp -R /unpack_axcl/usr/lib/axcl /usr/lib/ && \ + rm -rf /unpack_axcl /axcl.deb + + +# Install axcl ffmpeg +RUN mkdir -p /usr/lib/ffmpeg/axcl + +RUN if [ "$TARGETARCH" = "amd64" ]; then \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-x64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-x64 -O /usr/lib/ffmpeg/axcl/ffprobe; \ + else \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-aarch64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \ + wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-aarch64 -O /usr/lib/ffmpeg/axcl/ffprobe; \ + fi + +RUN chmod +x /usr/lib/ffmpeg/axcl/ffmpeg /usr/lib/ffmpeg/axcl/ffprobe + +# Set ldconfig path +RUN echo "/usr/lib/axcl" > /etc/ld.so.conf.d/ax.conf + +# Set env +ENV PATH="$PATH:/usr/bin/axcl" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/axcl" + +ENTRYPOINT ["sh", "-c", "ldconfig && exec /init"] \ No newline at end of file diff --git a/docker/axcl/axcl.hcl b/docker/axcl/axcl.hcl new file mode 100644 index 000000000..d7cf0d4eb --- /dev/null +++ b/docker/axcl/axcl.hcl @@ -0,0 +1,13 @@ +target frigate { + dockerfile = "docker/main/Dockerfile" + platforms = ["linux/amd64", "linux/arm64"] + target = "frigate" +} + +target axcl { + dockerfile = "docker/axcl/Dockerfile" + contexts = { + frigate = "target:frigate", + } + platforms = ["linux/amd64", "linux/arm64"] +} \ No newline at end of file diff --git a/docker/axcl/axcl.mk b/docker/axcl/axcl.mk new file mode 100644 index 000000000..e4b6d4cef --- /dev/null +++ b/docker/axcl/axcl.mk @@ -0,0 +1,15 @@ +BOARDS += axcl + +local-axcl: version + docker buildx bake --file=docker/axcl/axcl.hcl axcl \ + --set axcl.tags=frigate:latest-axcl \ + --load + +build-axcl: version + docker buildx bake --file=docker/axcl/axcl.hcl axcl \ + --set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl + +push-axcl: build-axcl + docker buildx bake --file=docker/axcl/axcl.hcl axcl \ + --set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl \ + --push \ No newline at end of file diff --git a/docker/axcl/user_installation.sh b/docker/axcl/user_installation.sh new file mode 100755 index 000000000..e053a5faf --- /dev/null +++ b/docker/axcl/user_installation.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Update package list and install dependencies +sudo apt-get update +sudo apt-get install -y build-essential cmake git wget pciutils kmod udev + +# Check if gcc-12 is needed +current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}') +gcc_major_version=$(echo $current_gcc_version | cut -d'.' -f1) + +if [[ $gcc_major_version -lt 12 ]]; then + echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..." + sudo apt-get install -y gcc-12 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 + echo "GCC-12 installed and set as default" +else + echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation" +fi + +# Determine architecture +arch=$(uname -m) +download_url="" + +if [[ $arch == "x86_64" ]]; then + download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb" + deb_file="axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb" +elif [[ $arch == "aarch64" ]]; then + download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb" + deb_file="axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb" +else + echo "Unsupported architecture: $arch" + exit 1 +fi + +# Download AXCL driver +echo "Downloading AXCL driver for $arch..." +wget "$download_url" -O "$deb_file" + +if [ $? -ne 0 ]; then + echo "Failed to download AXCL driver" + exit 1 +fi + +# Install AXCL driver +echo "Installing AXCL driver..." +sudo dpkg -i "$deb_file" + +if [ $? -ne 0 ]; then + echo "Failed to install AXCL driver, attempting to fix dependencies..." + sudo apt-get install -f -y + sudo dpkg -i "$deb_file" + + if [ $? -ne 0 ]; then + echo "AXCL driver installation failed" + exit 1 + fi +fi + +# Update environment +echo "Updating environment..." +source /etc/profile + +# Verify installation +echo "Verifying AXCL installation..." +if command -v axcl-smi &> /dev/null; then + echo "AXCL driver detected, checking AI accelerator status..." + + axcl_output=$(axcl-smi 2>&1) + axcl_exit_code=$? + + echo "$axcl_output" + + if [ $axcl_exit_code -eq 0 ]; then + echo "AXCL driver installation completed successfully!" + else + echo "AXCL driver installed but no AI accelerator detected or communication failed." + echo "Please check if the AI accelerator is properly connected and powered on." + exit 1 + fi +else + echo "axcl-smi command not found. AXCL driver installation may have failed." + exit 1 +fi \ No newline at end of file diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index e352a6a9a..139f318d3 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -47,6 +47,11 @@ Frigate supports multiple different detectors that work on different types of ha - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs. +**AXERA** + +- [AXEngine](#axera): axmodels can run on AXERA AI acceleration. + + **For Testing** - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results. @@ -1099,6 +1104,40 @@ model: # required labelmap_path: /labelmap/coco-80.txt # required ``` +## AXERA + +Hardware accelerated object detection is supported on the following SoCs: + +- AX650N +- AX8850N + +This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2). + +See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware. + +### Configuration + +When configuring the AXEngine detector, you have to specify the model name. + +#### yolov5s + +A yolov5s model is provided in the container at /axmodels and is used by this detector type by default. + +Use the model configuration shown below when using the axengine detector with the default axmodel: + +```yaml +detectors: # required + axengine: # required + type: axengine # required + +model: # required + path: yolov5s_320 # required + width: 320 # required + height: 320 # required + tensor_format: bgr # required + labelmap_path: /labelmap/coco-80.txt # required +``` + ## Rockchip platform Hardware accelerated object detection is supported on the following SoCs: diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index f06f8ac7d..731de0535 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -110,6 +110,20 @@ Frigate supports multiple different detectors that work on different types of ha | ssd mobilenet | ~ 25 ms | | yolov5m | ~ 118 ms | +**Synaptics** + +- [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection. + +::: + +### AXERA + +- **AXEngine** Default model is **yolov5s_320** + +| Name | AXERA AX650N/AX8850N Inference Time | +| ---------------- | ----------------------------------- | +| yolov5s_320 | ~ 1.676 ms | + ### Hailo-8 Frigate supports both the Hailo-8 and Hailo-8L AI Acceleration Modules on compatible hardware platforms—including the Raspberry Pi 5 with the PCIe hat from the AI kit. The Hailo detector integration in Frigate automatically identifies your hardware type and selects the appropriate default model when a custom model isn’t provided. diff --git a/docs/docs/frigate/installation.md b/docs/docs/frigate/installation.md index a4fd14d3c..281f87956 100644 --- a/docs/docs/frigate/installation.md +++ b/docs/docs/frigate/installation.md @@ -287,6 +287,40 @@ or add these options to your `docker run` command: Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics). +### AXERA + +AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments. + +#### Installation + +Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation. + +Follow these steps for installation: + +1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh). +2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh` +3. Run the script with `./user_installation.sh` + +#### Setup + +To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable` + +Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file: + +```yaml +devices: + - /dev/axcl_host + - /dev/ax_mmb_dev + - /dev/msg_userdev +``` + +If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev` + +#### Configuration + +Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup. + + ## Docker Running through Docker with Docker Compose is the recommended install method. diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py new file mode 100644 index 000000000..206923093 --- /dev/null +++ b/frigate/detectors/plugins/axengine.py @@ -0,0 +1,201 @@ +import logging +import os.path +import re +import urllib.request +from typing import Literal + +import cv2 +import numpy as np +from pydantic import Field + +from frigate.const import MODEL_CACHE_DIR +from frigate.detectors.detection_api import DetectionApi +from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum +from frigate.util.model import post_process_yolo + +import axengine as axe +from axengine import axclrt_provider_name, axengine_provider_name + +logger = logging.getLogger(__name__) + +DETECTOR_KEY = "axengine" + +CONF_THRESH = 0.65 +IOU_THRESH = 0.45 +STRIDES = [8, 16, 32] +ANCHORS = [ + [10, 13, 16, 30, 33, 23], + [30, 61, 62, 45, 59, 119], + [116, 90, 156, 198, 373, 326], +] + +class AxengineDetectorConfig(BaseDetectorConfig): + type: Literal[DETECTOR_KEY] + +class Axengine(DetectionApi): + type_key = DETECTOR_KEY + def __init__(self, config: AxengineDetectorConfig): + logger.info("__init__ axengine") + super().__init__(config) + self.height = config.model.height + self.width = config.model.width + model_path = config.model.path or "yolov5s_320" + self.session = axe.InferenceSession(f"/axmodels/{model_path}.axmodel") + + def __del__(self): + pass + + def xywh2xyxy(self, x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + def bboxes_iou(self, boxes1, boxes2): + """calculate the Intersection Over Union value""" + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * ( + boxes1[..., 3] - boxes1[..., 1] + ) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * ( + boxes2[..., 3] - boxes2[..., 1] + ) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) + + return ious + + def nms(self, proposals, iou_threshold, conf_threshold, multi_label=False): + """ + :param bboxes: (xmin, ymin, xmax, ymax, score, class) + + Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf + https://github.com/bharatsingh430/soft-nms + """ + xc = proposals[..., 4] > conf_threshold + proposals = proposals[xc] + proposals[:, 5:] *= proposals[:, 4:5] + bboxes = self.xywh2xyxy(proposals[:, :4]) + if multi_label: + mask = proposals[:, 5:] > conf_threshold + nonzero_indices = np.argwhere(mask) + if nonzero_indices.size < 0: + return + i, j = nonzero_indices.T + bboxes = np.hstack( + (bboxes[i], proposals[i, j + 5][:, None], j[:, None].astype(float)) + ) + else: + confidences = proposals[:, 5:] + conf = confidences.max(axis=1, keepdims=True) + j = confidences.argmax(axis=1)[:, None] + + new_x_parts = [bboxes, conf, j.astype(float)] + bboxes = np.hstack(new_x_parts) + + mask = conf.reshape(-1) > conf_threshold + bboxes = bboxes[mask] + + classes_in_img = list(set(bboxes[:, 5])) + bboxes = bboxes[bboxes[:, 4].argsort()[::-1][:300]] + best_bboxes = [] + + for cls in classes_in_img: + cls_mask = bboxes[:, 5] == cls + cls_bboxes = bboxes[cls_mask] + + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 4]) + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate( + [cls_bboxes[:max_ind], cls_bboxes[max_ind + 1 :]] + ) + iou = self.bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + weight = np.ones((len(iou),), dtype=np.float32) + + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + + cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight + score_mask = cls_bboxes[:, 4] > 0.0 + cls_bboxes = cls_bboxes[score_mask] + + if len(best_bboxes) == 0: + return np.empty((0, 6)) + + best_bboxes = np.vstack(best_bboxes) + best_bboxes = best_bboxes[best_bboxes[:, 4].argsort()[::-1]] + return best_bboxes + + def sigmoid(self, x): + return np.clip(0.2 * x + 0.5, 0, 1) + + def gen_proposals(self, outputs): + new_pred = [] + anchor_grid = np.array(ANCHORS).reshape(-1, 1, 1, 3, 2) + for i, pred in enumerate(outputs): + pred = self.sigmoid(pred) + n, h, w, c = pred.shape + + pred = pred.reshape(n, h, w, 3, 85) + conv_shape = pred.shape + output_size = conv_shape[1] + conv_raw_dxdy = pred[..., 0:2] + conv_raw_dwdh = pred[..., 2:4] + xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size)) + xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2) + + xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1]) + xy_grid = xy_grid.astype(np.float32) + pred_xy = (conv_raw_dxdy * 2.0 - 0.5 + xy_grid) * STRIDES[i] + pred_wh = (conv_raw_dwdh * 2) ** 2 * anchor_grid[i] + pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1) + + new_pred.append(np.reshape(pred, (-1, np.shape(pred)[-1]))) + + return np.concatenate(new_pred, axis=0) + + def post_processing(self, outputs, input_shape, threshold=0.3): + proposals = self.gen_proposals(outputs) + bboxes = self.nms(proposals, IOU_THRESH, CONF_THRESH, multi_label=True) + + """ + bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. + """ + + results = np.zeros((20, 6), np.float32) + + for i, bbox in enumerate(bboxes): + if i >= 20: + break + coor = np.array(bbox[:4], dtype=np.int32) + score = bbox[4] + if score < threshold: + continue + class_ind = int(bbox[5]) + results[i] = [ + class_ind, + score, + max(0, bbox[1]) / input_shape[1], + max(0, bbox[0]) / input_shape[0], + min(1, bbox[3] / input_shape[1]), + min(1, bbox[2] / input_shape[0]), + ] + return results + + def detect_raw(self, tensor_input): + results = None + results = self.session.run(None, {"images": tensor_input}) + return self.post_processing(results, (self.width, self.height)) From bb45483e9e1b0925475a59565a72b84bb4ff2992 Mon Sep 17 00:00:00 2001 From: ivanshi1108 Date: Tue, 28 Oct 2025 09:54:00 +0800 Subject: [PATCH 02/19] Modify AXERA section from hardware.md Modify AXERA section and related content from hardware documentation. --- docs/docs/frigate/hardware.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 731de0535..d70018b4a 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -110,19 +110,13 @@ Frigate supports multiple different detectors that work on different types of ha | ssd mobilenet | ~ 25 ms | | yolov5m | ~ 118 ms | -**Synaptics** - -- [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection. - -::: - ### AXERA - **AXEngine** Default model is **yolov5s_320** | Name | AXERA AX650N/AX8850N Inference Time | | ---------------- | ----------------------------------- | -| yolov5s_320 | ~ 1.676 ms | +| yolov5s_320 | ~ 1.676 ms | ### Hailo-8 From 91e17e12b72202d236fa1d0676fc57e91ee383d1 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Sun, 9 Nov 2025 13:21:17 +0000 Subject: [PATCH 03/19] Change the default detection model to YOLOv9 --- docker/axcl/Dockerfile | 2 +- docs/docs/configuration/object_detectors.md | 6 +- docs/docs/frigate/hardware.md | 4 +- frigate/detectors/plugins/axengine.py | 236 +++++++------------- 4 files changed, 90 insertions(+), 158 deletions(-) diff --git a/docker/axcl/Dockerfile b/docker/axcl/Dockerfile index 86e868b61..4a16bffaf 100644 --- a/docker/axcl/Dockerfile +++ b/docker/axcl/Dockerfile @@ -13,7 +13,7 @@ ARG PIP_BREAK_SYSTEM_PACKAGES # Install axmodels RUN mkdir -p /axmodels \ - && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov5s_320.axmodel -O /axmodels/yolov5s_320.axmodel + && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov9_tiny_u16_npu3_bgr_320x320_nhwc.axmodel -O /axmodels/yolov9_320.axmodel # Install axpyengine RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 139f318d3..983e3e5e7 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1119,9 +1119,9 @@ See the [installation docs](../frigate/installation.md#axera) for information on When configuring the AXEngine detector, you have to specify the model name. -#### yolov5s +#### yolov9 -A yolov5s model is provided in the container at /axmodels and is used by this detector type by default. +A yolov9 model is provided in the container at /axmodels and is used by this detector type by default. Use the model configuration shown below when using the axengine detector with the default axmodel: @@ -1131,7 +1131,7 @@ detectors: # required type: axengine # required model: # required - path: yolov5s_320 # required + path: yolov9_320 # required width: 320 # required height: 320 # required tensor_format: bgr # required diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index d70018b4a..1b6e425d8 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -112,11 +112,11 @@ Frigate supports multiple different detectors that work on different types of ha ### AXERA -- **AXEngine** Default model is **yolov5s_320** +- **AXEngine** Default model is **yolov9** | Name | AXERA AX650N/AX8850N Inference Time | | ---------------- | ----------------------------------- | -| yolov5s_320 | ~ 1.676 ms | +| yolov9 | ~ 1.012 ms | ### Hailo-8 diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py index 206923093..333c61756 100644 --- a/frigate/detectors/plugins/axengine.py +++ b/frigate/detectors/plugins/axengine.py @@ -20,14 +20,9 @@ logger = logging.getLogger(__name__) DETECTOR_KEY = "axengine" +NUM_CLASSES = 80 CONF_THRESH = 0.65 IOU_THRESH = 0.45 -STRIDES = [8, 16, 32] -ANCHORS = [ - [10, 13, 16, 30, 33, 23], - [30, 61, 62, 45, 59, 119], - [116, 90, 156, 198, 373, 326], -] class AxengineDetectorConfig(BaseDetectorConfig): type: Literal[DETECTOR_KEY] @@ -39,161 +34,98 @@ class Axengine(DetectionApi): super().__init__(config) self.height = config.model.height self.width = config.model.width - model_path = config.model.path or "yolov5s_320" + model_path = config.model.path or "yolov9_320" self.session = axe.InferenceSession(f"/axmodels/{model_path}.axmodel") def __del__(self): pass - def xywh2xyxy(self, x): - # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right - y = np.copy(x) - y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x - y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y - y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x - y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y - return y - - def bboxes_iou(self, boxes1, boxes2): - """calculate the Intersection Over Union value""" - boxes1 = np.array(boxes1) - boxes2 = np.array(boxes2) - - boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * ( - boxes1[..., 3] - boxes1[..., 1] - ) - boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * ( - boxes2[..., 3] - boxes2[..., 1] - ) - - left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) - right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) - - inter_section = np.maximum(right_down - left_up, 0.0) - inter_area = inter_section[..., 0] * inter_section[..., 1] - union_area = boxes1_area + boxes2_area - inter_area - ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) - - return ious - - def nms(self, proposals, iou_threshold, conf_threshold, multi_label=False): + def post_processing(self, raw_output, input_shape): """ - :param bboxes: (xmin, ymin, xmax, ymax, score, class) - - Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf - https://github.com/bharatsingh430/soft-nms + raw_output: [1, 1, 84, 8400] + Returns: numpy array of shape (20, 6) [class_id, score, y_min, x_min, y_max, x_max] in normalized coordinates """ - xc = proposals[..., 4] > conf_threshold - proposals = proposals[xc] - proposals[:, 5:] *= proposals[:, 4:5] - bboxes = self.xywh2xyxy(proposals[:, :4]) - if multi_label: - mask = proposals[:, 5:] > conf_threshold - nonzero_indices = np.argwhere(mask) - if nonzero_indices.size < 0: - return - i, j = nonzero_indices.T - bboxes = np.hstack( - (bboxes[i], proposals[i, j + 5][:, None], j[:, None].astype(float)) - ) - else: - confidences = proposals[:, 5:] - conf = confidences.max(axis=1, keepdims=True) - j = confidences.argmax(axis=1)[:, None] - - new_x_parts = [bboxes, conf, j.astype(float)] - bboxes = np.hstack(new_x_parts) - - mask = conf.reshape(-1) > conf_threshold - bboxes = bboxes[mask] - - classes_in_img = list(set(bboxes[:, 5])) - bboxes = bboxes[bboxes[:, 4].argsort()[::-1][:300]] - best_bboxes = [] - - for cls in classes_in_img: - cls_mask = bboxes[:, 5] == cls - cls_bboxes = bboxes[cls_mask] - - while len(cls_bboxes) > 0: - max_ind = np.argmax(cls_bboxes[:, 4]) - best_bbox = cls_bboxes[max_ind] - best_bboxes.append(best_bbox) - cls_bboxes = np.concatenate( - [cls_bboxes[:max_ind], cls_bboxes[max_ind + 1 :]] - ) - iou = self.bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) - weight = np.ones((len(iou),), dtype=np.float32) - - iou_mask = iou > iou_threshold - weight[iou_mask] = 0.0 - - cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight - score_mask = cls_bboxes[:, 4] > 0.0 - cls_bboxes = cls_bboxes[score_mask] - - if len(best_bboxes) == 0: - return np.empty((0, 6)) - - best_bboxes = np.vstack(best_bboxes) - best_bboxes = best_bboxes[best_bboxes[:, 4].argsort()[::-1]] - return best_bboxes - - def sigmoid(self, x): - return np.clip(0.2 * x + 0.5, 0, 1) - - def gen_proposals(self, outputs): - new_pred = [] - anchor_grid = np.array(ANCHORS).reshape(-1, 1, 1, 3, 2) - for i, pred in enumerate(outputs): - pred = self.sigmoid(pred) - n, h, w, c = pred.shape - - pred = pred.reshape(n, h, w, 3, 85) - conv_shape = pred.shape - output_size = conv_shape[1] - conv_raw_dxdy = pred[..., 0:2] - conv_raw_dwdh = pred[..., 2:4] - xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size)) - xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2) - - xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1]) - xy_grid = xy_grid.astype(np.float32) - pred_xy = (conv_raw_dxdy * 2.0 - 0.5 + xy_grid) * STRIDES[i] - pred_wh = (conv_raw_dwdh * 2) ** 2 * anchor_grid[i] - pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1) - - new_pred.append(np.reshape(pred, (-1, np.shape(pred)[-1]))) - - return np.concatenate(new_pred, axis=0) - - def post_processing(self, outputs, input_shape, threshold=0.3): - proposals = self.gen_proposals(outputs) - bboxes = self.nms(proposals, IOU_THRESH, CONF_THRESH, multi_label=True) - - """ - bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. - """ - results = np.zeros((20, 6), np.float32) - for i, bbox in enumerate(bboxes): - if i >= 20: - break - coor = np.array(bbox[:4], dtype=np.int32) - score = bbox[4] - if score < threshold: - continue - class_ind = int(bbox[5]) - results[i] = [ - class_ind, - score, - max(0, bbox[1]) / input_shape[1], - max(0, bbox[0]) / input_shape[0], - min(1, bbox[3] / input_shape[1]), - min(1, bbox[2] / input_shape[0]), - ] - return results + try: + if not isinstance(raw_output, np.ndarray): + raw_output = np.array(raw_output) + + if len(raw_output.shape) == 4 and raw_output.shape[0] == 1 and raw_output.shape[1] == 1: + raw_output = raw_output.squeeze(1) + + pred = raw_output[0].transpose(1, 0) + + bxy = pred[:, :2] + bwh = pred[:, 2:4] + cls = pred[:, 4:4 + NUM_CLASSES] + + cx = bxy[:, 0] + cy = bxy[:, 1] + w = bwh[:, 0] + h = bwh[:, 1] + + x_min = cx - w / 2 + y_min = cy - h / 2 + x_max = cx + w / 2 + y_max = cy + h / 2 + + scores = np.max(cls, axis=1) + class_ids = np.argmax(cls, axis=1) + + mask = scores >= CONF_THRESH + boxes = np.stack([x_min, y_min, x_max, y_max], axis=1)[mask] + scores = scores[mask] + class_ids = class_ids[mask] + + if len(boxes) == 0: + return results + + boxes_nms = np.stack([x_min[mask], y_min[mask], + x_max[mask] - x_min[mask], + y_max[mask] - y_min[mask]], axis=1) + + indices = cv2.dnn.NMSBoxes( + boxes_nms.tolist(), + scores.tolist(), + score_threshold=CONF_THRESH, + nms_threshold=IOU_THRESH + ) + + if len(indices) == 0: + return results + + indices = indices.flatten() + + sorted_indices = sorted(indices, key=lambda idx: scores[idx], reverse=True) + indices = sorted_indices + + valid_detections = 0 + for i, idx in enumerate(indices): + if i >= 20: + break + + x_min_val, y_min_val, x_max_val, y_max_val = boxes[idx] + score = scores[idx] + class_id = class_ids[idx] + + if score < CONF_THRESH: + continue + + results[valid_detections] = [ + float(class_id), # class_id + float(score), # score + max(0, y_min_val) / input_shape[0], # y_min + max(0, x_min_val) / input_shape[1], # x_min + min(1, y_max_val / input_shape[0]), # y_max + min(1, x_max_val / input_shape[1]) # x_max + ] + valid_detections += 1 + + return results + + except Exception as e: + return results def detect_raw(self, tensor_input): results = None From 1dee548dbce18fd641b144eb4d4952a3fb40e6e1 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Tue, 11 Nov 2025 04:40:27 +0000 Subject: [PATCH 04/19] Modifications to the YOLOv9 object detection model: The model is now dynamically downloaded to the cache directory. Post-processing is now done using Frigate's built-in `post_process_yolo`. Configuration in the relevant documentation has been updated. --- docker/axcl/Dockerfile | 4 - docs/docs/configuration/object_detectors.md | 3 +- docs/docs/frigate/hardware.md | 2 +- frigate/detectors/plugins/axengine.py | 132 +++++++------------- 4 files changed, 49 insertions(+), 92 deletions(-) diff --git a/docker/axcl/Dockerfile b/docker/axcl/Dockerfile index 4a16bffaf..83271bce8 100644 --- a/docker/axcl/Dockerfile +++ b/docker/axcl/Dockerfile @@ -11,10 +11,6 @@ FROM frigate AS frigate-axcl ARG TARGETARCH ARG PIP_BREAK_SYSTEM_PACKAGES -# Install axmodels -RUN mkdir -p /axmodels \ - && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov9_tiny_u16_npu3_bgr_320x320_nhwc.axmodel -O /axmodels/yolov9_320.axmodel - # Install axpyengine RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \ diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 983e3e5e7..88b015c34 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1131,7 +1131,8 @@ detectors: # required type: axengine # required model: # required - path: yolov9_320 # required + path: frigate-yolov9-tiny # required + model_type: yolo-generic # required width: 320 # required height: 320 # required tensor_format: bgr # required diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 1b6e425d8..cf7ebcdb8 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -116,7 +116,7 @@ Frigate supports multiple different detectors that work on different types of ha | Name | AXERA AX650N/AX8850N Inference Time | | ---------------- | ----------------------------------- | -| yolov9 | ~ 1.012 ms | +| yolov9-tiny | ~ 1.012 ms | ### Hailo-8 diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py index 333c61756..3bbfead09 100644 --- a/frigate/detectors/plugins/axengine.py +++ b/frigate/detectors/plugins/axengine.py @@ -20,9 +20,12 @@ logger = logging.getLogger(__name__) DETECTOR_KEY = "axengine" -NUM_CLASSES = 80 -CONF_THRESH = 0.65 -IOU_THRESH = 0.45 +supported_models = { + ModelTypeEnum.yologeneric: "frigate-yolov9-tiny", +} + +model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/") + class AxengineDetectorConfig(BaseDetectorConfig): type: Literal[DETECTOR_KEY] @@ -34,100 +37,57 @@ class Axengine(DetectionApi): super().__init__(config) self.height = config.model.height self.width = config.model.width - model_path = config.model.path or "yolov9_320" - self.session = axe.InferenceSession(f"/axmodels/{model_path}.axmodel") + model_path = config.model.path or "frigate-yolov9-tiny" + + model_props = self.parse_model_input(model_path) + + self.session = axe.InferenceSession(model_props["path"]) def __del__(self): pass - def post_processing(self, raw_output, input_shape): - """ - raw_output: [1, 1, 84, 8400] - Returns: numpy array of shape (20, 6) [class_id, score, y_min, x_min, y_max, x_max] in normalized coordinates - """ - results = np.zeros((20, 6), np.float32) + def parse_model_input(self, model_path): + model_props = {} + model_props["preset"] = True - try: - if not isinstance(raw_output, np.ndarray): - raw_output = np.array(raw_output) + model_matched = False + for model_type, pattern in supported_models.items(): + if re.match(pattern, model_path): + model_matched = True + model_props["model_type"] = model_type - if len(raw_output.shape) == 4 and raw_output.shape[0] == 1 and raw_output.shape[1] == 1: - raw_output = raw_output.squeeze(1) + if model_matched: + model_props["filename"] = model_path + f".axmodel" + model_props["path"] = model_cache_dir + model_props["filename"] - pred = raw_output[0].transpose(1, 0) - - bxy = pred[:, :2] - bwh = pred[:, 2:4] - cls = pred[:, 4:4 + NUM_CLASSES] - - cx = bxy[:, 0] - cy = bxy[:, 1] - w = bwh[:, 0] - h = bwh[:, 1] - - x_min = cx - w / 2 - y_min = cy - h / 2 - x_max = cx + w / 2 - y_max = cy + h / 2 - - scores = np.max(cls, axis=1) - class_ids = np.argmax(cls, axis=1) - - mask = scores >= CONF_THRESH - boxes = np.stack([x_min, y_min, x_max, y_max], axis=1)[mask] - scores = scores[mask] - class_ids = class_ids[mask] - - if len(boxes) == 0: - return results - - boxes_nms = np.stack([x_min[mask], y_min[mask], - x_max[mask] - x_min[mask], - y_max[mask] - y_min[mask]], axis=1) - - indices = cv2.dnn.NMSBoxes( - boxes_nms.tolist(), - scores.tolist(), - score_threshold=CONF_THRESH, - nms_threshold=IOU_THRESH + if not os.path.isfile(model_props["path"]): + self.download_model(model_props["filename"]) + else: + supported_models_str = ", ".join( + model[1:-1] for model in supported_models ) + raise Exception( + f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}" + ) + return model_props - if len(indices) == 0: - return results + def download_model(self, filename): + if not os.path.isdir(model_cache_dir): + os.mkdir(model_cache_dir) - indices = indices.flatten() - - sorted_indices = sorted(indices, key=lambda idx: scores[idx], reverse=True) - indices = sorted_indices - - valid_detections = 0 - for i, idx in enumerate(indices): - if i >= 20: - break - - x_min_val, y_min_val, x_max_val, y_max_val = boxes[idx] - score = scores[idx] - class_id = class_ids[idx] - - if score < CONF_THRESH: - continue - - results[valid_detections] = [ - float(class_id), # class_id - float(score), # score - max(0, y_min_val) / input_shape[0], # y_min - max(0, x_min_val) / input_shape[1], # x_min - min(1, y_max_val / input_shape[0]), # y_max - min(1, x_max_val / input_shape[1]) # x_max - ] - valid_detections += 1 - - return results - - except Exception as e: - return results + GITHUB_ENDPOINT = os.environ.get("GITHUB_ENDPOINT", "https://github.com") + urllib.request.urlretrieve( + f"{GITHUB_ENDPOINT}/ivanshi1108/assets/releases/download/v0.16.2/{filename}", + model_cache_dir + filename, + ) def detect_raw(self, tensor_input): results = None results = self.session.run(None, {"images": tensor_input}) - return self.post_processing(results, (self.width, self.height)) + if self.detector_config.model.model_type == ModelTypeEnum.yologeneric: + return post_process_yolo(results, self.width, self.height) + else: + raise ValueError( + f'Model type "{self.detector_config.model.model_type}" is currently not supported.' + ) + From e27a94ae0b0055b763d904c6434c669f76476e13 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Tue, 11 Nov 2025 05:54:19 +0000 Subject: [PATCH 05/19] Fix logical errors caused by code formatting --- frigate/detectors/plugins/axengine.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py index 3bbfead09..9cde9841b 100644 --- a/frigate/detectors/plugins/axengine.py +++ b/frigate/detectors/plugins/axengine.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) DETECTOR_KEY = "axengine" supported_models = { - ModelTypeEnum.yologeneric: "frigate-yolov9-tiny", + ModelTypeEnum.yologeneric: "frigate-yolov9-.*$", } model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/") @@ -38,9 +38,7 @@ class Axengine(DetectionApi): self.height = config.model.height self.width = config.model.width model_path = config.model.path or "frigate-yolov9-tiny" - model_props = self.parse_model_input(model_path) - self.session = axe.InferenceSession(model_props["path"]) def __del__(self): @@ -51,6 +49,7 @@ class Axengine(DetectionApi): model_props["preset"] = True model_matched = False + for model_type, pattern in supported_models.items(): if re.match(pattern, model_path): model_matched = True @@ -60,8 +59,8 @@ class Axengine(DetectionApi): model_props["filename"] = model_path + f".axmodel" model_props["path"] = model_cache_dir + model_props["filename"] - if not os.path.isfile(model_props["path"]): - self.download_model(model_props["filename"]) + if not os.path.isfile(model_props["path"]): + self.download_model(model_props["filename"]) else: supported_models_str = ", ".join( model[1:-1] for model in supported_models From 438df7d48429dc400ad7ac1c4223d9354e28c419 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Sun, 16 Nov 2025 22:22:34 +0800 Subject: [PATCH 06/19] The model inference time has been changed to the time displayed on the Frigate UI --- docs/docs/frigate/hardware.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index cf7ebcdb8..6cce97b3b 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -116,7 +116,7 @@ Frigate supports multiple different detectors that work on different types of ha | Name | AXERA AX650N/AX8850N Inference Time | | ---------------- | ----------------------------------- | -| yolov9-tiny | ~ 1.012 ms | +| yolov9-tiny | ~ 4 ms | ### Hailo-8 From b4abbd7d3b3dd8e2da538bb9ac88dc3d1c8453df Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Mon, 24 Nov 2025 02:17:52 +0000 Subject: [PATCH 07/19] Modify the document based on review suggestions --- docs/docs/configuration/object_detectors.md | 70 ++++++++++----------- docs/docs/frigate/installation.md | 4 +- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 88b015c34..7351ef6f4 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1104,41 +1104,6 @@ model: # required labelmap_path: /labelmap/coco-80.txt # required ``` -## AXERA - -Hardware accelerated object detection is supported on the following SoCs: - -- AX650N -- AX8850N - -This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2). - -See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware. - -### Configuration - -When configuring the AXEngine detector, you have to specify the model name. - -#### yolov9 - -A yolov9 model is provided in the container at /axmodels and is used by this detector type by default. - -Use the model configuration shown below when using the axengine detector with the default axmodel: - -```yaml -detectors: # required - axengine: # required - type: axengine # required - -model: # required - path: frigate-yolov9-tiny # required - model_type: yolo-generic # required - width: 320 # required - height: 320 # required - tensor_format: bgr # required - labelmap_path: /labelmap/coco-80.txt # required -``` - ## Rockchip platform Hardware accelerated object detection is supported on the following SoCs: @@ -1403,6 +1368,41 @@ model: input_pixel_format: rgb/bgr # look at the model.json to figure out which to put here ``` +## AXERA + +Hardware accelerated object detection is supported on the following SoCs: + +- AX650N +- AX8850N + +This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2). + +See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware. + +### Configuration + +When configuring the AXEngine detector, you have to specify the model name. + +#### yolov9 + +A yolov9 model is provided in the container at /axmodels and is used by this detector type by default. + +Use the model configuration shown below when using the axengine detector with the default axmodel: + +```yaml +detectors: + axengine: + type: axengine + +model: + path: frigate-yolov9-tiny + model_type: yolo-generic + width: 320 + height: 320 + tensor_format: bgr + labelmap_path: /labelmap/coco-80.txt +``` + # Models Some model types are not included in Frigate by default. diff --git a/docs/docs/frigate/installation.md b/docs/docs/frigate/installation.md index 281f87956..4622f68be 100644 --- a/docs/docs/frigate/installation.md +++ b/docs/docs/frigate/installation.md @@ -289,6 +289,8 @@ Next, you should configure [hardware object detection](/configuration/object_det ### AXERA +
+AXERA accelerators AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments. #### Installation @@ -319,7 +321,7 @@ If you are using `docker run`, add this option to your command `--device /dev/ax #### Configuration Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup. - +
## Docker From f134796913c536f470a22c4fa6c78d3e90ad50c6 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Mon, 24 Nov 2025 02:42:04 +0000 Subject: [PATCH 08/19] format code with ruff --- frigate/detectors/plugins/axengine.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py index 9cde9841b..507fb582c 100644 --- a/frigate/detectors/plugins/axengine.py +++ b/frigate/detectors/plugins/axengine.py @@ -4,18 +4,13 @@ import re import urllib.request from typing import Literal -import cv2 -import numpy as np -from pydantic import Field +import axengine as axe from frigate.const import MODEL_CACHE_DIR from frigate.detectors.detection_api import DetectionApi from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum from frigate.util.model import post_process_yolo -import axengine as axe -from axengine import axclrt_provider_name, axengine_provider_name - logger = logging.getLogger(__name__) DETECTOR_KEY = "axengine" @@ -56,7 +51,7 @@ class Axengine(DetectionApi): model_props["model_type"] = model_type if model_matched: - model_props["filename"] = model_path + f".axmodel" + model_props["filename"] = model_path + ".axmodel" model_props["path"] = model_cache_dir + model_props["filename"] if not os.path.isfile(model_props["path"]): From 2eef58aa1d521617710bb0c9888588a0a4d726e1 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Mon, 24 Nov 2025 06:57:32 +0000 Subject: [PATCH 09/19] Modify the description of AXERA in the documentation. --- docs/docs/frigate/hardware.md | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 8d999fb85..26b9b0706 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -104,6 +104,10 @@ Frigate supports multiple different detectors that work on different types of ha - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection. +**AXERA** + +- [AXEngine](#axera): axera models can run on AXERA NPUs via AXEngine, delivering highly efficient object detection. + ::: ### Hailo-8 @@ -287,6 +291,14 @@ The inference time of a rk3588 with all 3 cores enabled is typically 25-30 ms fo | ssd mobilenet | ~ 25 ms | | yolov5m | ~ 118 ms | +### AXERA + +- **AXEngine** Default model is **yolov9** + +| Name | AXERA AX650N/AX8850N Inference Time | +| ---------------- | ----------------------------------- | +| yolov9-tiny | ~ 4 ms | + ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version) This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity. @@ -307,12 +319,4 @@ Basically - When you increase the resolution and/or the frame rate of the stream YES! The Coral does not help with decoding video streams. -Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work. - -### AXERA - -- **AXEngine** Default model is **yolov9** - -| Name | AXERA AX650N/AX8850N Inference Time | -| ---------------- | ----------------------------------- | -| yolov9-tiny | ~ 4 ms | \ No newline at end of file +Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work. \ No newline at end of file From 7933a83a429305ca8837c5b1fd318e3412b71334 Mon Sep 17 00:00:00 2001 From: ivanshi1108 Date: Mon, 24 Nov 2025 23:04:19 +0800 Subject: [PATCH 10/19] Update docs/docs/configuration/object_detectors.md Co-authored-by: Nicolas Mowen --- docs/docs/configuration/object_detectors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index e7f0bc685..6227d9711 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -49,7 +49,7 @@ Frigate supports multiple different detectors that work on different types of ha - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs. -**AXERA** +**AXERA** - [AXEngine](#axera): axmodels can run on AXERA AI acceleration. From acb17a7b50c9fafad25f1fa87faa028ce2998b33 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Mon, 1 Dec 2025 04:47:35 +0000 Subject: [PATCH 11/19] Format code based on the results of Python Checks x --- frigate/detectors/plugins/axengine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py index 507fb582c..39c4d1a98 100644 --- a/frigate/detectors/plugins/axengine.py +++ b/frigate/detectors/plugins/axengine.py @@ -25,8 +25,10 @@ model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/") class AxengineDetectorConfig(BaseDetectorConfig): type: Literal[DETECTOR_KEY] + class Axengine(DetectionApi): type_key = DETECTOR_KEY + def __init__(self, config: AxengineDetectorConfig): logger.info("__init__ axengine") super().__init__(config) @@ -57,9 +59,7 @@ class Axengine(DetectionApi): if not os.path.isfile(model_props["path"]): self.download_model(model_props["filename"]) else: - supported_models_str = ", ".join( - model[1:-1] for model in supported_models - ) + supported_models_str = ", ".join(model[1:-1] for model in supported_models) raise Exception( f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}" ) @@ -84,4 +84,3 @@ class Axengine(DetectionApi): raise ValueError( f'Model type "{self.detector_config.model.model_type}" is currently not supported.' ) - From 3a0b020f0c2f12802c7064cfe79f719fe47145a5 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Sun, 15 Feb 2026 02:34:16 +0800 Subject: [PATCH 12/19] Integrate for the Axera accelerator card --- docker/axcl/Dockerfile | 30 -- docker/axcl/rk-axcl.hcl | 7 + docker/axcl/rpi-axcl.hcl | 7 + docker/axcl/user_installation.sh | 49 ++- docker/axcl/x86-axcl.hcl | 13 + frigate/config/classification.py | 1 + frigate/embeddings/embeddings.py | 13 + .../embeddings/onnx/jina_v2_embedding_ax.py | 281 ++++++++++++++++++ web/src/pages/Explore.tsx | 63 +++- web/src/types/frigateConfig.ts | 2 +- 10 files changed, 416 insertions(+), 50 deletions(-) create mode 100644 docker/axcl/rk-axcl.hcl create mode 100644 docker/axcl/rpi-axcl.hcl create mode 100644 docker/axcl/x86-axcl.hcl create mode 100644 frigate/embeddings/onnx/jina_v2_embedding_ax.py diff --git a/docker/axcl/Dockerfile b/docker/axcl/Dockerfile index 83271bce8..e67046055 100644 --- a/docker/axcl/Dockerfile +++ b/docker/axcl/Dockerfile @@ -6,7 +6,6 @@ ARG DEBIAN_FRONTEND=noninteractive # Globally set pip break-system-packages option to avoid having to specify it every time ARG PIP_BREAK_SYSTEM_PACKAGES=1 - FROM frigate AS frigate-axcl ARG TARGETARCH ARG PIP_BREAK_SYSTEM_PACKAGES @@ -16,35 +15,6 @@ RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/ax RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \ && rm /axengine-0.1.3-py3-none-any.whl -# Install axcl -RUN if [ "$TARGETARCH" = "amd64" ]; then \ - echo "Installing x86_64 version of axcl"; \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \ - else \ - echo "Installing aarch64 version of axcl"; \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \ - fi - -RUN mkdir /unpack_axcl && \ - dpkg-deb -x /axcl.deb /unpack_axcl && \ - cp -R /unpack_axcl/usr/bin/axcl /usr/bin/ && \ - cp -R /unpack_axcl/usr/lib/axcl /usr/lib/ && \ - rm -rf /unpack_axcl /axcl.deb - - -# Install axcl ffmpeg -RUN mkdir -p /usr/lib/ffmpeg/axcl - -RUN if [ "$TARGETARCH" = "amd64" ]; then \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-x64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-x64 -O /usr/lib/ffmpeg/axcl/ffprobe; \ - else \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-aarch64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \ - wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-aarch64 -O /usr/lib/ffmpeg/axcl/ffprobe; \ - fi - -RUN chmod +x /usr/lib/ffmpeg/axcl/ffmpeg /usr/lib/ffmpeg/axcl/ffprobe - # Set ldconfig path RUN echo "/usr/lib/axcl" > /etc/ld.so.conf.d/ax.conf diff --git a/docker/axcl/rk-axcl.hcl b/docker/axcl/rk-axcl.hcl new file mode 100644 index 000000000..eea2bd93d --- /dev/null +++ b/docker/axcl/rk-axcl.hcl @@ -0,0 +1,7 @@ +target rk-axcl { + dockerfile = "docker/axcl/Dockerfile" + contexts = { + frigate = "target:rk", + } + platforms = ["linux/arm64"] +} \ No newline at end of file diff --git a/docker/axcl/rpi-axcl.hcl b/docker/axcl/rpi-axcl.hcl new file mode 100644 index 000000000..72cdc71c0 --- /dev/null +++ b/docker/axcl/rpi-axcl.hcl @@ -0,0 +1,7 @@ +target rpi-axcl { + dockerfile = "docker/axcl/Dockerfile" + contexts = { + frigate = "target:rpi", + } + platforms = ["linux/arm64"] +} \ No newline at end of file diff --git a/docker/axcl/user_installation.sh b/docker/axcl/user_installation.sh index e053a5faf..4a36a99e1 100755 --- a/docker/axcl/user_installation.sh +++ b/docker/axcl/user_installation.sh @@ -1,14 +1,25 @@ #!/bin/bash +set -e + +# Function to clean up on error +cleanup() { + echo "Cleaning up temporary files..." + rm -f "$deb_file" +} + +trap cleanup ERR +trap 'echo "Script interrupted by user (Ctrl+C)"; cleanup; exit 130' INT + # Update package list and install dependencies +echo "Updating package list and installing dependencies..." sudo apt-get update sudo apt-get install -y build-essential cmake git wget pciutils kmod udev # Check if gcc-12 is needed +echo "Checking GCC version..." current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}') -gcc_major_version=$(echo $current_gcc_version | cut -d'.' -f1) - -if [[ $gcc_major_version -lt 12 ]]; then +if ! dpkg --compare-versions "$current_gcc_version" ge "12" 2>/dev/null; then echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..." sudo apt-get install -y gcc-12 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12 @@ -18,26 +29,37 @@ else fi # Determine architecture +echo "Determining system architecture..." arch=$(uname -m) download_url="" if [[ $arch == "x86_64" ]]; then - download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb" - deb_file="axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb" + download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_x86_64_V3.10.2_20251111020143_NO5046.deb" + deb_file="axcl.deb" elif [[ $arch == "aarch64" ]]; then - download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb" - deb_file="axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb" + download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_aarch64_V3.10.2_20251111020143_NO5046.deb" + deb_file="axcl.deb" else echo "Unsupported architecture: $arch" exit 1 fi +# Check for required Linux headers before downloading +echo "Checking for required Linux headers..." +kernel_version=$(uname -r) +if dpkg -l | grep -q "linux-headers-${kernel_version}" || [ -d "/lib/modules/${kernel_version}/build" ]; then + echo "Linux headers or kernel modules directory found for kernel ${kernel_version}/build." +else + echo "Linux headers for kernel ${kernel_version} not found. Please install them first: sudo apt-get install linux-headers-${kernel_version}" + exit 1 +fi + # Download AXCL driver echo "Downloading AXCL driver for $arch..." -wget "$download_url" -O "$deb_file" +wget --timeout=30 --tries=3 "$download_url" -O "$deb_file" if [ $? -ne 0 ]; then - echo "Failed to download AXCL driver" + echo "Failed to download AXCL driver after retries" exit 1 fi @@ -51,7 +73,7 @@ if [ $? -ne 0 ]; then sudo dpkg -i "$deb_file" if [ $? -ne 0 ]; then - echo "AXCL driver installation failed" + echo "AXCL driver installation failed after dependency fix" exit 1 fi fi @@ -80,4 +102,9 @@ if command -v axcl-smi &> /dev/null; then else echo "axcl-smi command not found. AXCL driver installation may have failed." exit 1 -fi \ No newline at end of file +fi + +# Clean up +echo "Cleaning up temporary files..." +rm -f "$deb_file" +echo "Installation script completed." \ No newline at end of file diff --git a/docker/axcl/x86-axcl.hcl b/docker/axcl/x86-axcl.hcl new file mode 100644 index 000000000..78546be1a --- /dev/null +++ b/docker/axcl/x86-axcl.hcl @@ -0,0 +1,13 @@ +target frigate { + dockerfile = "docker/main/Dockerfile" + platforms = ["linux/amd64"] + target = "frigate" +} + +target x86-axcl { + dockerfile = "docker/axcl/Dockerfile" + contexts = { + frigate = "target:frigate", + } + platforms = ["linux/amd64"] +} \ No newline at end of file diff --git a/frigate/config/classification.py b/frigate/config/classification.py index fb8e3de29..9d5b16561 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -19,6 +19,7 @@ __all__ = [ class SemanticSearchModelEnum(str, Enum): jinav1 = "jinav1" jinav2 = "jinav2" + ax_jinav2 = "ax_jinav2" class EnrichmentsDeviceEnum(str, Enum): diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 8d7bcd235..835986a58 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -30,6 +30,7 @@ from frigate.util.file import get_event_thumbnail_bytes from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding from .onnx.jina_v2_embedding import JinaV2Embedding +from .onnx.jina_v2_embedding_ax import AXJinaV2Embedding logger = logging.getLogger(__name__) @@ -118,6 +119,18 @@ class Embeddings: self.vision_embedding = lambda input_data: self.embedding( input_data, embedding_type="vision" ) + elif self.config.semantic_search.model == SemanticSearchModelEnum.ax_jinav2: + # AXJinaV2Embedding instance for both text and vision + self.embedding = AXJinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + ) + self.text_embedding = lambda input_data: self.embedding( + input_data, embedding_type="text" + ) + self.vision_embedding = lambda input_data: self.embedding( + input_data, embedding_type="vision" + ) else: # Default to jinav1 self.text_embedding = JinaV1TextEmbedding( model_size=config.semantic_search.model_size, diff --git a/frigate/embeddings/onnx/jina_v2_embedding_ax.py b/frigate/embeddings/onnx/jina_v2_embedding_ax.py new file mode 100644 index 000000000..1d39ce014 --- /dev/null +++ b/frigate/embeddings/onnx/jina_v2_embedding_ax.py @@ -0,0 +1,281 @@ +"""AX JinaV2 Embeddings.""" + +import io +import logging +import os +import threading +from typing import Any + +import numpy as np +from PIL import Image +from transformers import AutoTokenizer +from transformers.utils.logging import disable_progress_bar, set_verbosity_error + +from frigate.const import MODEL_CACHE_DIR +from frigate.embeddings.onnx.base_embedding import BaseEmbedding +from frigate.comms.inter_process import InterProcessRequestor +from frigate.util.downloader import ModelDownloader +from frigate.types import ModelStatusTypesEnum +from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE + +import axengine as axe + +# disables the progress bar and download logging for downloading tokenizers and image processors +disable_progress_bar() +set_verbosity_error() +logger = logging.getLogger(__name__) + + +class AXClipRunner: + def __init__(self, image_encoder_path: str, text_encoder_path: str): + self.image_encoder_path = image_encoder_path + self.text_encoder_path = text_encoder_path + self.image_encoder_runner = axe.InferenceSession(image_encoder_path) + self.text_encoder_runner = axe.InferenceSession(text_encoder_path) + + for input in self.image_encoder_runner.get_inputs(): + logger.info(f"{input.name} {input.shape} {input.dtype}") + + for output in self.image_encoder_runner.get_outputs(): + logger.info(f"{output.name} {output.shape} {output.dtype}") + + for input in self.text_encoder_runner.get_inputs(): + logger.info(f"{input.name} {input.shape} {input.dtype}") + + for output in self.text_encoder_runner.get_outputs(): + logger.info(f"{output.name} {output.shape} {output.dtype}") + + def run(self, onnx_inputs): + text_embeddings = [] + image_embeddings = [] + if "input_ids" in onnx_inputs: + for input_ids in onnx_inputs["input_ids"]: + input_ids = input_ids.reshape(1, -1) + text_embeddings.append( + self.text_encoder_runner.run(None, {"inputs_id": input_ids})[0][0] + ) + if "pixel_values" in onnx_inputs: + for pixel_values in onnx_inputs["pixel_values"]: + if len(pixel_values.shape) == 3: + pixel_values = pixel_values[None, ...] + image_embeddings.append( + self.image_encoder_runner.run(None, {"pixel_values": pixel_values})[ + 0 + ][0] + ) + return np.array(text_embeddings), np.array(image_embeddings) + + +class AXJinaV2Embedding(BaseEmbedding): + def __init__( + self, + model_size: str, + requestor: InterProcessRequestor, + device: str = "AUTO", + embedding_type: str = None, + ): + HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") + super().__init__( + model_name="AXERA-TECH/jina-clip-v2", + model_file=None, + download_urls={ + "image_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/image_encoder.axmodel", + "text_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/text_encoder.axmodel", + }, + ) + + self.tokenizer_source = "jinaai/jina-clip-v2" + self.tokenizer_file = "tokenizer" + self.embedding_type = embedding_type + self.requestor = requestor + self.model_size = model_size + self.device = device + self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name) + self.tokenizer = None + self.image_processor = None + self.runner = None + self.mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32) + self.std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32) + + # Lock to prevent concurrent calls (text and vision share this instance) + self._call_lock = threading.Lock() + + # download the model and tokenizer + files_names = list(self.download_urls.keys()) + [self.tokenizer_file] + if not all( + os.path.exists(os.path.join(self.download_path, n)) for n in files_names + ): + logger.debug(f"starting model download for {self.model_name}") + self.downloader = ModelDownloader( + model_name=self.model_name, + download_path=self.download_path, + file_names=files_names, + download_func=self._download_model, + ) + self.downloader.ensure_model_files() + # Avoid lazy loading in worker threads: block until downloads complete + # and load the model on the main thread during initialization. + self._load_model_and_utils() + else: + self.downloader = None + ModelDownloader.mark_files_state( + self.requestor, + self.model_name, + files_names, + ModelStatusTypesEnum.downloaded, + ) + self._load_model_and_utils() + logger.debug(f"models are already downloaded for {self.model_name}") + + def _download_model(self, path: str): + try: + file_name = os.path.basename(path) + + if file_name in self.download_urls: + ModelDownloader.download_from_url(self.download_urls[file_name], path) + elif file_name == self.tokenizer_file: + tokenizer = AutoTokenizer.from_pretrained( + self.tokenizer_source, + trust_remote_code=True, + cache_dir=os.path.join( + MODEL_CACHE_DIR, self.model_name, "tokenizer" + ), + clean_up_tokenization_spaces=True, + ) + tokenizer.save_pretrained(path) + self.requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": f"{self.model_name}-{file_name}", + "state": ModelStatusTypesEnum.downloaded, + }, + ) + except Exception: + self.requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": f"{self.model_name}-{file_name}", + "state": ModelStatusTypesEnum.error, + }, + ) + + def _load_model_and_utils(self): + if self.runner is None: + if self.downloader: + self.downloader.wait_for_download() + + self.tokenizer = AutoTokenizer.from_pretrained( + self.tokenizer_source, + cache_dir=os.path.join(MODEL_CACHE_DIR, self.model_name, "tokenizer"), + trust_remote_code=True, + clean_up_tokenization_spaces=True, + ) + + self.runner = AXClipRunner( + os.path.join(self.download_path, "image_encoder.axmodel"), + os.path.join(self.download_path, "text_encoder.axmodel"), + ) + + def _preprocess_image(self, image_data: bytes | Image.Image): + """ + Manually preprocess a single image from bytes or PIL.Image to (3, 512, 512). + """ + if isinstance(image_data, bytes): + image = Image.open(io.BytesIO(image_data)) + else: + image = image_data + + if image.mode != "RGB": + image = image.convert("RGB") + + image = image.resize((512, 512), Image.Resampling.LANCZOS) + + # Convert to numpy array, normalize to [0, 1], and transpose to (channels, height, width) + image_array = np.array(image, dtype=np.float32) / 255.0 + # Normalize using mean and std + image_array = (image_array - self.mean) / self.std + + image_array = np.transpose(image_array, (2, 0, 1)) # (H, W, C) -> (C, H, W) + + return image_array + + def _preprocess_inputs(self, raw_inputs): + """ + Preprocess inputs into a list of real input tensors (no dummies). + - For text: Returns list of input_ids. + - For vision: Returns list of pixel_values. + """ + if not isinstance(raw_inputs, list): + raw_inputs = [raw_inputs] + + processed = [] + if self.embedding_type == "text": + for text in raw_inputs: + input_ids = self.tokenizer( + [text], return_tensors="np", padding="max_length", max_length=50 + )["input_ids"] + input_ids = input_ids.astype(np.int32) + processed.append(input_ids) + elif self.embedding_type == "vision": + for img in raw_inputs: + pixel_values = self._preprocess_image(img) + processed.append( + pixel_values[np.newaxis, ...] + ) # Add batch dim: (1, 3, 512, 512) + else: + raise ValueError( + f"Invalid embedding_type: {self.embedding_type}. Must be 'text' or 'vision'." + ) + return processed + + def _postprocess_outputs(self, outputs): + """ + Process ONNX model outputs, truncating each embedding in the array to truncate_dim. + - outputs: NumPy array of embeddings. + - Returns: List of truncated embeddings. + """ + # size of vector in database + truncate_dim = 768 + + # jina v2 defaults to 1024 and uses Matryoshka representation, so + # truncating only causes an extremely minor decrease in retrieval accuracy + if outputs.shape[-1] > truncate_dim: + outputs = outputs[..., :truncate_dim] + + return outputs + + def __call__( + self, inputs: list[str] | list[Image.Image] | list[str], embedding_type=None + ): + # Lock the entire call to prevent race conditions when text and vision + # embeddings are called concurrently from different threads + with self._call_lock: + self.embedding_type = embedding_type + if not self.embedding_type: + raise ValueError( + "embedding_type must be specified either in __init__ or __call__" + ) + + self._load_model_and_utils() + processed = self._preprocess_inputs(inputs) + + # Prepare ONNX inputs with matching batch sizes + onnx_inputs = {} + if self.embedding_type == "text": + onnx_inputs["input_ids"] = np.stack([x[0] for x in processed]) + elif self.embedding_type == "vision": + onnx_inputs["pixel_values"] = np.stack([x[0] for x in processed]) + else: + raise ValueError("Invalid embedding type") + + # Run inference + text_embeddings, image_embeddings = self.runner.run(onnx_inputs) + if self.embedding_type == "text": + embeddings = text_embeddings # text embeddings + elif self.embedding_type == "vision": + embeddings = image_embeddings # image embeddings + else: + raise ValueError("Invalid embedding type") + + embeddings = self._postprocess_outputs(embeddings) + return [embedding for embedding in embeddings] diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 8f50e982e..4ff0a2020 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -292,10 +292,13 @@ export default function Explore() { const modelVersion = config?.semantic_search.model || "jinav1"; const modelSize = config?.semantic_search.model_size || "small"; + const isAxJinaV2 = modelVersion === "ax_jinav2"; // Text model state const { payload: textModelState } = useModelState( - modelVersion === "jinav1" + isAxJinaV2 + ? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel" + : modelVersion === "jinav1" ? "jinaai/jina-clip-v1-text_model_fp16.onnx" : modelSize === "large" ? "jinaai/jina-clip-v2-model_fp16.onnx" @@ -304,14 +307,18 @@ export default function Explore() { // Tokenizer state const { payload: textTokenizerState } = useModelState( - modelVersion === "jinav1" + isAxJinaV2 + ? "AXERA-TECH/jina-clip-v2-tokenizer" + : modelVersion === "jinav1" ? "jinaai/jina-clip-v1-tokenizer" : "jinaai/jina-clip-v2-tokenizer", ); // Vision model state (same as text model for jinav2) const visionModelFile = - modelVersion === "jinav1" + isAxJinaV2 + ? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel" + : modelVersion === "jinav1" ? modelSize === "large" ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" : "jinaai/jina-clip-v1-vision_model_quantized.onnx" @@ -321,13 +328,49 @@ export default function Explore() { const { payload: visionModelState } = useModelState(visionModelFile); // Preprocessor/feature extractor state - const { payload: visionFeatureExtractorState } = useModelState( + const { payload: visionFeatureExtractorStateRaw } = useModelState( modelVersion === "jinav1" ? "jinaai/jina-clip-v1-preprocessor_config.json" : "jinaai/jina-clip-v2-preprocessor_config.json", ); + + const visionFeatureExtractorState = useMemo(() => { + if (isAxJinaV2) { + return visionModelState ?? "downloading"; + } + return visionFeatureExtractorStateRaw; + }, [isAxJinaV2, visionModelState, visionFeatureExtractorStateRaw]); + + const effectiveTextModelState = useMemo(() => { + if (isAxJinaV2) { + return textModelState ?? "downloading"; + } + return textModelState; + }, [isAxJinaV2, textModelState]); + + const effectiveTextTokenizerState = useMemo(() => { + if (isAxJinaV2) { + return textTokenizerState ?? "downloading"; + } + return textTokenizerState; + }, [isAxJinaV2, textTokenizerState]); + + const effectiveVisionModelState = useMemo(() => { + if (isAxJinaV2) { + return visionModelState ?? "downloading"; + } + return visionModelState; + }, [isAxJinaV2, visionModelState]); + const allModelsLoaded = useMemo(() => { + if (isAxJinaV2) { + return ( + effectiveTextModelState === "downloaded" && + effectiveTextTokenizerState === "downloaded" && + effectiveVisionModelState === "downloaded" + ); + } return ( textModelState === "downloaded" && textTokenizerState === "downloaded" && @@ -335,6 +378,10 @@ export default function Explore() { visionFeatureExtractorState === "downloaded" ); }, [ + isAxJinaV2, + effectiveTextModelState, + effectiveTextTokenizerState, + effectiveVisionModelState, textModelState, textTokenizerState, visionModelState, @@ -358,10 +405,10 @@ export default function Explore() { !defaultViewLoaded || (config?.semantic_search.enabled && (!reindexState || - !textModelState || - !textTokenizerState || - !visionModelState || - !visionFeatureExtractorState)) + !(isAxJinaV2 ? effectiveTextModelState : textModelState) || + !(isAxJinaV2 ? effectiveTextTokenizerState : textTokenizerState) || + !(isAxJinaV2 ? effectiveVisionModelState : visionModelState) || + (!isAxJinaV2 && !visionFeatureExtractorState))) ) { return ( diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 94c9ba6e9..369160319 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -28,7 +28,7 @@ export interface FaceRecognitionConfig { recognition_threshold: number; } -export type SearchModel = "jinav1" | "jinav2"; +export type SearchModel = "jinav1" | "jinav2" | "ax_jinav2"; export type SearchModelSize = "small" | "large"; export interface CameraConfig { From 176f5cce66c25f8ce99000c1e6c9495634dca220 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Sun, 15 Feb 2026 01:51:05 +0800 Subject: [PATCH 13/19] Add workflow for Axera --- .github/workflows/ax.yml | 143 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 .github/workflows/ax.yml diff --git a/.github/workflows/ax.yml b/.github/workflows/ax.yml new file mode 100644 index 000000000..82721eb3c --- /dev/null +++ b/.github/workflows/ax.yml @@ -0,0 +1,143 @@ +name: AXERA + +on: + workflow_dispatch: + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +env: + PYTHON_VERSION: 3.9 + +jobs: + x86_axcl_builds: + runs-on: ubuntu-22.04 + name: x86_AXCL Build + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set x86_AXCL_TAG + run: echo "x86_AXCL_TAG=x86-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV + + - name: Set Version + run: make version + + - name: Build + uses: docker/bake-action@v6 + with: + source: . + push: false + targets: x86-axcl + files: docker/axcl/x86-axcl.hcl + no-cache: true + set: | + x86-axcl.tags=frigate:${{ env.x86_AXCL_TAG }} + + - name: Clean up disk space + run: | + docker system prune -f + + - name: Save Docker image as tar file + run: | + docker save frigate:${{ env.x86_AXCL_TAG }} -o frigate-${{ env.x86_AXCL_TAG }}.tar + ls -lh frigate-${{ env.x86_AXCL_TAG }}.tar + + - name: Upload Docker image artifact + uses: actions/upload-artifact@v4 + with: + name: x86-axcl-docker-image + path: frigate-${{ env.x86_AXCL_TAG }}.tar + retention-days: 7 + + rk_axcl_builds: + runs-on: ubuntu-22.04-arm + name: rk_AXCL Build + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set RK_AXCL_TAG + run: echo "RK_AXCL_TAG=rk-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV + + - name: Set Version + run: make version + + - name: Build + uses: docker/bake-action@v6 + with: + source: . + push: false + targets: rk-axcl + files: | + docker/rockchip/rk.hcl + docker/axcl/rk-axcl.hcl + no-cache: true + set: | + rk-axcl.tags=frigate:${{ env.RK_AXCL_TAG }} + + - name: Clean up disk space + run: | + docker system prune -f + + - name: Save Docker image as tar file + run: | + docker save frigate:${{ env.RK_AXCL_TAG }} -o frigate-${{ env.RK_AXCL_TAG }}.tar + ls -lh frigate-${{ env.RK_AXCL_TAG }}.tar + + - name: Upload Docker image artifact + uses: actions/upload-artifact@v4 + with: + name: rk-axcl-docker-image + path: frigate-${{ env.RK_AXCL_TAG }}.tar + retention-days: 7 + + + rpi_axcl_builds: + runs-on: ubuntu-22.04-arm + name: RPi_AXCL Build + steps: + - name: Check out code + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set RPi_AXCL_TAG + run: echo "RPi_AXCL_TAG=rpi-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV + + - name: Set Version + run: make version + + - name: Build + uses: docker/bake-action@v6 + with: + source: . + push: false + targets: rpi-axcl + files: | + docker/rpi/rpi.hcl + docker/axcl/rpi-axcl.hcl + no-cache: true + set: | + rpi-axcl.tags=frigate:${{ env.RPi_AXCL_TAG }} + + - name: Clean up disk space + run: | + docker system prune -f + + - name: Save Docker image as tar file + run: | + docker save frigate:${{ env.RPi_AXCL_TAG }} -o frigate-${{ env.RPi_AXCL_TAG }}.tar + ls -lh frigate-${{ env.RPi_AXCL_TAG }}.tar + + - name: Upload Docker image artifact + uses: actions/upload-artifact@v4 + with: + name: rpi-axcl-docker-image + path: frigate-${{ env.RPi_AXCL_TAG }}.tar + retention-days: 7 From 984d654c408aa716771208aecd4bbedd9e3442fe Mon Sep 17 00:00:00 2001 From: Meow Date: Mon, 23 Feb 2026 14:45:49 +0100 Subject: [PATCH 14/19] Update line breaks in video_pipeline.md diagram (#21919) Mermaid compatible newlines (
) --- docs/docs/frigate/video_pipeline.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/docs/frigate/video_pipeline.md b/docs/docs/frigate/video_pipeline.md index ba9365650..74b804b16 100644 --- a/docs/docs/frigate/video_pipeline.md +++ b/docs/docs/frigate/video_pipeline.md @@ -37,18 +37,18 @@ The following diagram adds a lot more detail than the simple view explained befo %%{init: {"themeVariables": {"edgeLabelBackground": "transparent"}}}%% flowchart TD - RecStore[(Recording\nstore)] - SnapStore[(Snapshot\nstore)] + RecStore[(Recording
store)] + SnapStore[(Snapshot
store)] subgraph Acquisition Cam["Camera"] -->|FFmpeg supported| Stream - Cam -->|"Other streaming\nprotocols"| go2rtc + Cam -->|"Other streaming
protocols"| go2rtc go2rtc("go2rtc") --> Stream - Stream[Capture main and\nsub streams] --> |detect stream|Decode(Decode and\ndownscale) + Stream[Capture main and
sub streams] --> |detect stream|Decode(Decode and
downscale) end subgraph Motion - Decode --> MotionM(Apply\nmotion masks) - MotionM --> MotionD(Motion\ndetection) + Decode --> MotionM(Apply
motion masks) + MotionM --> MotionD(Motion
detection) end subgraph Detection MotionD --> |motion regions| ObjectD(Object detection) @@ -60,8 +60,8 @@ flowchart TD MotionD --> |motion event|Birdseye ObjectZ --> |object event|Birdseye - MotionD --> |"video segments\n(retain motion)"|RecStore + MotionD --> |"video segments
(retain motion)"|RecStore ObjectZ --> |detection clip|RecStore - Stream -->|"video segments\n(retain all)"| RecStore + Stream -->|"video segments
(retain all)"| RecStore ObjectZ --> |detection snapshot|SnapStore ``` From dd8282ff3c0a1ef8e6418f5ec0cb3a88cba199dc Mon Sep 17 00:00:00 2001 From: Bart Nagel Date: Tue, 24 Feb 2026 06:38:04 -0800 Subject: [PATCH 15/19] Docs: fix YOLOv9 onnx export (#22107) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Docs: fix missing dependency in YOLOv9 build script I had this command fail because it didn't have cmake available. This change fixes that problem. * Docs: avoid failure in YOLOv9 build script Pinning to 0.4.36 avoids this error: ``` 10.58 Downloading onnx 12.87 Building onnxsim==0.5.0 1029.4 × Failed to download and build `onnxsim==0.5.0` 1029.4 ╰─▶ Package metadata version `0.4.36` does not match given version `0.5.0` 1029.4 help: `onnxsim` (v0.5.0) was included because `onnx-simplifier` (v0.5.0) 1029.4 depends on `onnxsim` ``` * Update Dockerfile instructions for object detectors --------- Co-authored-by: Nicolas Mowen --- docs/docs/configuration/object_detectors.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 7016bf4b6..5db813d29 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1057,12 +1057,12 @@ YOLOv9 model can be exported as ONNX using the command below. You can copy and p ```sh docker build . --build-arg MODEL_SIZE=t --build-arg IMG_SIZE=320 --output . -f- <<'EOF' FROM python:3.11 AS build -RUN apt-get update && apt-get install --no-install-recommends -y libgl1 && rm -rf /var/lib/apt/lists/* -COPY --from=ghcr.io/astral-sh/uv:0.8.0 /uv /bin/ +RUN apt-get update && apt-get install --no-install-recommends -y cmake libgl1 && rm -rf /var/lib/apt/lists/* +COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /bin/ WORKDIR /yolov9 ADD https://github.com/WongKinYiu/yolov9.git . RUN uv pip install --system -r requirements.txt -RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier>=0.4.1 onnxscript +RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier==0.4.* onnxscript ARG MODEL_SIZE ARG IMG_SIZE ADD https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-${MODEL_SIZE}-converted.pt yolov9-${MODEL_SIZE}.pt From c174956b2939752518f90d749ea6a5c7820fb12b Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Thu, 5 Mar 2026 22:11:10 +0800 Subject: [PATCH 16/19] Refactor: Replace the ax_jinav2 model type with the axengine detector and jinav2 --- frigate/config/classification.py | 1 - frigate/embeddings/embeddings.py | 58 ++++++++++--------- web/src/pages/Explore.tsx | 98 +++++++++++--------------------- web/src/types/frigateConfig.ts | 2 +- 4 files changed, 67 insertions(+), 92 deletions(-) diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 9d5b16561..fb8e3de29 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -19,7 +19,6 @@ __all__ = [ class SemanticSearchModelEnum(str, Enum): jinav1 = "jinav1" jinav2 = "jinav2" - ax_jinav2 = "ax_jinav2" class EnrichmentsDeviceEnum(str, Enum): diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 835986a58..f306c0982 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -94,6 +94,10 @@ class Embeddings: # Create tables if they don't exist self.db.create_embeddings_tables() + self.has_axengine = any( + d.type == "axengine" for d in self.config.detectors.values() + ) + models = self.get_model_definitions() for model in models: @@ -106,25 +110,20 @@ class Embeddings: ) if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - # Single JinaV2Embedding instance for both text and vision - self.embedding = JinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - device=config.semantic_search.device - or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), - ) - self.text_embedding = lambda input_data: self.embedding( - input_data, embedding_type="text" - ) - self.vision_embedding = lambda input_data: self.embedding( - input_data, embedding_type="vision" - ) - elif self.config.semantic_search.model == SemanticSearchModelEnum.ax_jinav2: - # AXJinaV2Embedding instance for both text and vision - self.embedding = AXJinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - ) + if self.has_axengine: + # AXJinaV2Embedding instance for both text and vision on Axera NPU + self.embedding = AXJinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + ) + else: + # Single JinaV2Embedding instance for both text and vision + self.embedding = JinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + device=config.semantic_search.device + or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), + ) self.text_embedding = lambda input_data: self.embedding( input_data, embedding_type="text" ) @@ -151,13 +150,20 @@ class Embeddings: def get_model_definitions(self): # Version-specific models if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - models = [ - "jinaai/jina-clip-v2-tokenizer", - "jinaai/jina-clip-v2-model_fp16.onnx" - if self.config.semantic_search.model_size == "large" - else "jinaai/jina-clip-v2-model_quantized.onnx", - "jinaai/jina-clip-v2-preprocessor_config.json", - ] + if self.has_axengine: + models = [ + "AXERA-TECH/jina-clip-v2-text_encoder.axmodel", + "AXERA-TECH/jina-clip-v2-image_encoder.axmodel", + "AXERA-TECH/jina-clip-v2-tokenizer", + ] + else: + models = [ + "jinaai/jina-clip-v2-tokenizer", + "jinaai/jina-clip-v2-model_fp16.onnx" + if self.config.semantic_search.model_size == "large" + else "jinaai/jina-clip-v2-model_quantized.onnx", + "jinaai/jina-clip-v2-preprocessor_config.json", + ] else: # Default to jinav1 models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 4ff0a2020..98c1a653c 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -292,17 +292,24 @@ export default function Explore() { const modelVersion = config?.semantic_search.model || "jinav1"; const modelSize = config?.semantic_search.model_size || "small"; - const isAxJinaV2 = modelVersion === "ax_jinav2"; + const isAxJinaV2 = useMemo( + () => + modelVersion === "jinav2" && + Object.values( + (config?.detectors ?? {}) as Record, + ).some((detector) => detector?.type === "axengine"), + [modelVersion, config?.detectors], + ); // Text model state const { payload: textModelState } = useModelState( isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel" : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-text_model_fp16.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx", + ? "jinaai/jina-clip-v1-text_model_fp16.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx", ); // Tokenizer state @@ -310,8 +317,8 @@ export default function Explore() { isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-tokenizer" : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-tokenizer" - : "jinaai/jina-clip-v2-tokenizer", + ? "jinaai/jina-clip-v1-tokenizer" + : "jinaai/jina-clip-v2-tokenizer", ); // Vision model state (same as text model for jinav2) @@ -319,69 +326,30 @@ export default function Explore() { isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel" : modelVersion === "jinav1" - ? modelSize === "large" - ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" - : "jinaai/jina-clip-v1-vision_model_quantized.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx"; + ? modelSize === "large" + ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" + : "jinaai/jina-clip-v1-vision_model_quantized.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx"; const { payload: visionModelState } = useModelState(visionModelFile); // Preprocessor/feature extractor state - const { payload: visionFeatureExtractorStateRaw } = useModelState( + const { payload: visionFeatureExtractorState } = useModelState( modelVersion === "jinav1" ? "jinaai/jina-clip-v1-preprocessor_config.json" : "jinaai/jina-clip-v2-preprocessor_config.json", ); - - const visionFeatureExtractorState = useMemo(() => { - if (isAxJinaV2) { - return visionModelState ?? "downloading"; - } - return visionFeatureExtractorStateRaw; - }, [isAxJinaV2, visionModelState, visionFeatureExtractorStateRaw]); - - const effectiveTextModelState = useMemo(() => { - if (isAxJinaV2) { - return textModelState ?? "downloading"; - } - return textModelState; - }, [isAxJinaV2, textModelState]); - - const effectiveTextTokenizerState = useMemo(() => { - if (isAxJinaV2) { - return textTokenizerState ?? "downloading"; - } - return textTokenizerState; - }, [isAxJinaV2, textTokenizerState]); - - const effectiveVisionModelState = useMemo(() => { - if (isAxJinaV2) { - return visionModelState ?? "downloading"; - } - return visionModelState; - }, [isAxJinaV2, visionModelState]); - const allModelsLoaded = useMemo(() => { - if (isAxJinaV2) { - return ( - effectiveTextModelState === "downloaded" && - effectiveTextTokenizerState === "downloaded" && - effectiveVisionModelState === "downloaded" - ); - } return ( textModelState === "downloaded" && textTokenizerState === "downloaded" && visionModelState === "downloaded" && - visionFeatureExtractorState === "downloaded" + (isAxJinaV2 || visionFeatureExtractorState === "downloaded") ); }, [ isAxJinaV2, - effectiveTextModelState, - effectiveTextTokenizerState, - effectiveVisionModelState, textModelState, textTokenizerState, visionModelState, @@ -405,9 +373,9 @@ export default function Explore() { !defaultViewLoaded || (config?.semantic_search.enabled && (!reindexState || - !(isAxJinaV2 ? effectiveTextModelState : textModelState) || - !(isAxJinaV2 ? effectiveTextTokenizerState : textTokenizerState) || - !(isAxJinaV2 ? effectiveVisionModelState : visionModelState) || + !textModelState || + !textTokenizerState || + !visionModelState || (!isAxJinaV2 && !visionFeatureExtractorState))) ) { return ( @@ -498,12 +466,14 @@ export default function Explore() { "exploreIsUnavailable.downloadingModels.setup.visionModel", )} -
- {renderModelStateIcon(visionFeatureExtractorState)} - {t( - "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", - )} -
+ {!isAxJinaV2 && ( +
+ {renderModelStateIcon(visionFeatureExtractorState)} + {t( + "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", + )} +
+ )}
{renderModelStateIcon(textModelState)} {t( @@ -520,7 +490,7 @@ export default function Explore() { {(textModelState === "error" || textTokenizerState === "error" || visionModelState === "error" || - visionFeatureExtractorState === "error") && ( + (!isAxJinaV2 && visionFeatureExtractorState === "error")) && (
{t("exploreIsUnavailable.downloadingModels.error")}
diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 369160319..94c9ba6e9 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -28,7 +28,7 @@ export interface FaceRecognitionConfig { recognition_threshold: number; } -export type SearchModel = "jinav1" | "jinav2" | "ax_jinav2"; +export type SearchModel = "jinav1" | "jinav2"; export type SearchModelSize = "small" | "large"; export interface CameraConfig { From 8073d618eb2c694be409c6110a2097c9e1a08601 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Fri, 6 Mar 2026 13:23:38 +0800 Subject: [PATCH 17/19] format file by ruff --- frigate/embeddings/embeddings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index f306c0982..95ff886a1 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -122,7 +122,9 @@ class Embeddings: model_size=self.config.semantic_search.model_size, requestor=self.requestor, device=config.semantic_search.device - or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), + or ( + "GPU" if config.semantic_search.model_size == "large" else "CPU" + ), ) self.text_embedding = lambda input_data: self.embedding( input_data, embedding_type="text" From f955e6d661c86f30baf06a828c426152fa80d9e4 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Fri, 6 Mar 2026 13:38:41 +0800 Subject: [PATCH 18/19] modify jina_v2_embedding_ax.py by ruff check result --- frigate/embeddings/onnx/jina_v2_embedding_ax.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/frigate/embeddings/onnx/jina_v2_embedding_ax.py b/frigate/embeddings/onnx/jina_v2_embedding_ax.py index 1d39ce014..be222c306 100644 --- a/frigate/embeddings/onnx/jina_v2_embedding_ax.py +++ b/frigate/embeddings/onnx/jina_v2_embedding_ax.py @@ -4,21 +4,18 @@ import io import logging import os import threading -from typing import Any +import axengine as axe import numpy as np from PIL import Image from transformers import AutoTokenizer from transformers.utils.logging import disable_progress_bar, set_verbosity_error -from frigate.const import MODEL_CACHE_DIR -from frigate.embeddings.onnx.base_embedding import BaseEmbedding from frigate.comms.inter_process import InterProcessRequestor -from frigate.util.downloader import ModelDownloader -from frigate.types import ModelStatusTypesEnum from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE - -import axengine as axe +from frigate.embeddings.onnx.base_embedding import BaseEmbedding +from frigate.types import ModelStatusTypesEnum +from frigate.util.downloader import ModelDownloader # disables the progress bar and download logging for downloading tokenizers and image processors disable_progress_bar() From 4eae551341e3a0055facab20534acef07cb6a224 Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Sat, 7 Mar 2026 01:58:11 +0800 Subject: [PATCH 19/19] Refactor: Reimplement the clip function in axengine --- frigate/detectors/detection_runners.py | 127 ++++++++ frigate/embeddings/embeddings.py | 49 +-- frigate/embeddings/onnx/jina_v2_embedding.py | 19 +- .../embeddings/onnx/jina_v2_embedding_ax.py | 278 ------------------ frigate/util/axengine_converter.py | 190 ++++++++++++ web/src/pages/Explore.tsx | 65 ++-- 6 files changed, 369 insertions(+), 359 deletions(-) delete mode 100644 frigate/embeddings/onnx/jina_v2_embedding_ax.py create mode 100644 frigate/util/axengine_converter.py diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index fcbb41e66..7565c9a3d 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -10,6 +10,10 @@ from typing import Any import numpy as np import onnxruntime as ort +from frigate.util.axengine_converter import ( + auto_convert_model as auto_load_axengine_model, +) +from frigate.util.axengine_converter import is_axengine_compatible from frigate.util.model import get_ort_providers from frigate.util.rknn_converter import auto_convert_model, is_rknn_compatible @@ -548,12 +552,135 @@ class RKNNModelRunner(BaseModelRunner): pass +class AXEngineModelRunner(BaseModelRunner): + """Run AXEngine models for embeddings.""" + + _mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32).reshape( + 1, 3, 1, 1 + ) + _std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32).reshape( + 1, 3, 1, 1 + ) + + def __init__(self, model_path: str, model_type: str | None = None): + self.model_path = model_path + self.model_type = model_type + self._inference_lock = threading.Lock() + self.image_session = None + self.text_session = None + self.text_pad_token_id = 0 + self._load_model() + + def _load_model(self): + try: + import axengine as axe + from transformers import AutoTokenizer + except ImportError: + logger.error("AXEngine is not available") + raise ImportError("AXEngine is not available") + + model_dir = os.path.dirname(self.model_path) + image_model_path = os.path.join(model_dir, "image_encoder.axmodel") + text_model_path = os.path.join(model_dir, "text_encoder.axmodel") + tokenizer_path = os.path.join(model_dir, "tokenizer") + + self.image_session = axe.InferenceSession(image_model_path) + self.text_session = axe.InferenceSession(text_model_path) + + try: + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_path, + trust_remote_code=True, + clean_up_tokenization_spaces=True, + ) + if tokenizer.pad_token_id is not None: + self.text_pad_token_id = int(tokenizer.pad_token_id) + except Exception: + logger.warning( + "Failed to load tokenizer from %s for AXEngine padding, using 0", + tokenizer_path, + ) + + def get_input_names(self) -> list[str]: + return ["input_ids", "pixel_values"] + + def get_input_width(self) -> int: + return 512 + + @staticmethod + def _has_real_text_inputs(inputs: dict[str, Any]) -> bool: + input_ids = inputs.get("input_ids") + + if input_ids is None: + return False + + if input_ids.ndim < 2: + return False + + return input_ids.shape[-1] != 16 or np.any(input_ids) + + @staticmethod + def _has_real_image_inputs(inputs: dict[str, Any]) -> bool: + pixel_values = inputs.get("pixel_values") + + return pixel_values is not None and np.any(pixel_values) + + def _prepare_text_inputs(self, input_ids: np.ndarray) -> np.ndarray: + padded_input_ids = np.full((1, 50), self.text_pad_token_id, dtype=np.int32) + truncated_input_ids = input_ids.reshape(1, -1)[:, :50].astype(np.int32) + padded_input_ids[:, : truncated_input_ids.shape[1]] = truncated_input_ids + return padded_input_ids + + @classmethod + def _prepare_pixel_values(cls, pixel_values: np.ndarray) -> np.ndarray: + if len(pixel_values.shape) == 3: + pixel_values = pixel_values[None, ...] + + pixel_values = pixel_values.astype(np.float32) + return (pixel_values - cls._mean) / cls._std + + def run(self, inputs: dict[str, Any]) -> list[np.ndarray | None]: + outputs: list[np.ndarray | None] = [None, None, None, None] + + with self._inference_lock: + if self._has_real_text_inputs(inputs): + text_embeddings = [] + for input_ids in inputs["input_ids"]: + text_embeddings.append( + self.text_session.run( + None, + {"inputs_id": self._prepare_text_inputs(input_ids)}, + )[0][0] + ) + outputs[2] = np.array(text_embeddings) + + if self._has_real_image_inputs(inputs): + image_embeddings = [] + for pixel_values in inputs["pixel_values"]: + image_embeddings.append( + self.image_session.run( + None, + {"pixel_values": self._prepare_pixel_values(pixel_values)}, + )[0][0] + ) + + outputs[3] = np.array(image_embeddings) + + return outputs + + def get_optimized_runner( model_path: str, device: str | None, model_type: str, **kwargs ) -> BaseModelRunner: """Get an optimized runner for the hardware.""" device = device or "AUTO" + if is_axengine_compatible(model_path, device, model_type): + axmodel_path = auto_load_axengine_model(model_path, model_type) + + if axmodel_path: + return AXEngineModelRunner(axmodel_path, model_type) + if device != "CPU" and is_rknn_compatible(model_path): rknn_path = auto_convert_model(model_path) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 95ff886a1..8d7bcd235 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -30,7 +30,6 @@ from frigate.util.file import get_event_thumbnail_bytes from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding from .onnx.jina_v2_embedding import JinaV2Embedding -from .onnx.jina_v2_embedding_ax import AXJinaV2Embedding logger = logging.getLogger(__name__) @@ -94,10 +93,6 @@ class Embeddings: # Create tables if they don't exist self.db.create_embeddings_tables() - self.has_axengine = any( - d.type == "axengine" for d in self.config.detectors.values() - ) - models = self.get_model_definitions() for model in models: @@ -110,22 +105,13 @@ class Embeddings: ) if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - if self.has_axengine: - # AXJinaV2Embedding instance for both text and vision on Axera NPU - self.embedding = AXJinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - ) - else: - # Single JinaV2Embedding instance for both text and vision - self.embedding = JinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - device=config.semantic_search.device - or ( - "GPU" if config.semantic_search.model_size == "large" else "CPU" - ), - ) + # Single JinaV2Embedding instance for both text and vision + self.embedding = JinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + device=config.semantic_search.device + or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), + ) self.text_embedding = lambda input_data: self.embedding( input_data, embedding_type="text" ) @@ -152,20 +138,13 @@ class Embeddings: def get_model_definitions(self): # Version-specific models if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - if self.has_axengine: - models = [ - "AXERA-TECH/jina-clip-v2-text_encoder.axmodel", - "AXERA-TECH/jina-clip-v2-image_encoder.axmodel", - "AXERA-TECH/jina-clip-v2-tokenizer", - ] - else: - models = [ - "jinaai/jina-clip-v2-tokenizer", - "jinaai/jina-clip-v2-model_fp16.onnx" - if self.config.semantic_search.model_size == "large" - else "jinaai/jina-clip-v2-model_quantized.onnx", - "jinaai/jina-clip-v2-preprocessor_config.json", - ] + models = [ + "jinaai/jina-clip-v2-tokenizer", + "jinaai/jina-clip-v2-model_fp16.onnx" + if self.config.semantic_search.model_size == "large" + else "jinaai/jina-clip-v2-model_quantized.onnx", + "jinaai/jina-clip-v2-preprocessor_config.json", + ] else: # Default to jinav1 models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", diff --git a/frigate/embeddings/onnx/jina_v2_embedding.py b/frigate/embeddings/onnx/jina_v2_embedding.py index 1abd968c9..aa3947943 100644 --- a/frigate/embeddings/onnx/jina_v2_embedding.py +++ b/frigate/embeddings/onnx/jina_v2_embedding.py @@ -37,13 +37,18 @@ class JinaV2Embedding(BaseEmbedding): "model_fp16.onnx" if model_size == "large" else "model_quantized.onnx" ) HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") + use_axengine = (device or "").upper() == "AXENGINE" super().__init__( model_name="jinaai/jina-clip-v2", model_file=model_file, - download_urls={ - model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}", - "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json", - }, + download_urls=( + {} + if use_axengine + else { + model_file: f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/onnx/{model_file}", + "preprocessor_config.json": f"{HF_ENDPOINT}/jinaai/jina-clip-v2/resolve/main/preprocessor_config.json", + } + ), ) self.tokenizer_file = "tokenizer" self.embedding_type = embedding_type @@ -59,7 +64,11 @@ class JinaV2Embedding(BaseEmbedding): self._call_lock = threading.Lock() # download the model and tokenizer - files_names = list(self.download_urls.keys()) + [self.tokenizer_file] + files_names = ( + [self.tokenizer_file] + if use_axengine + else list(self.download_urls.keys()) + [self.tokenizer_file] + ) if not all( os.path.exists(os.path.join(self.download_path, n)) for n in files_names ): diff --git a/frigate/embeddings/onnx/jina_v2_embedding_ax.py b/frigate/embeddings/onnx/jina_v2_embedding_ax.py deleted file mode 100644 index be222c306..000000000 --- a/frigate/embeddings/onnx/jina_v2_embedding_ax.py +++ /dev/null @@ -1,278 +0,0 @@ -"""AX JinaV2 Embeddings.""" - -import io -import logging -import os -import threading - -import axengine as axe -import numpy as np -from PIL import Image -from transformers import AutoTokenizer -from transformers.utils.logging import disable_progress_bar, set_verbosity_error - -from frigate.comms.inter_process import InterProcessRequestor -from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE -from frigate.embeddings.onnx.base_embedding import BaseEmbedding -from frigate.types import ModelStatusTypesEnum -from frigate.util.downloader import ModelDownloader - -# disables the progress bar and download logging for downloading tokenizers and image processors -disable_progress_bar() -set_verbosity_error() -logger = logging.getLogger(__name__) - - -class AXClipRunner: - def __init__(self, image_encoder_path: str, text_encoder_path: str): - self.image_encoder_path = image_encoder_path - self.text_encoder_path = text_encoder_path - self.image_encoder_runner = axe.InferenceSession(image_encoder_path) - self.text_encoder_runner = axe.InferenceSession(text_encoder_path) - - for input in self.image_encoder_runner.get_inputs(): - logger.info(f"{input.name} {input.shape} {input.dtype}") - - for output in self.image_encoder_runner.get_outputs(): - logger.info(f"{output.name} {output.shape} {output.dtype}") - - for input in self.text_encoder_runner.get_inputs(): - logger.info(f"{input.name} {input.shape} {input.dtype}") - - for output in self.text_encoder_runner.get_outputs(): - logger.info(f"{output.name} {output.shape} {output.dtype}") - - def run(self, onnx_inputs): - text_embeddings = [] - image_embeddings = [] - if "input_ids" in onnx_inputs: - for input_ids in onnx_inputs["input_ids"]: - input_ids = input_ids.reshape(1, -1) - text_embeddings.append( - self.text_encoder_runner.run(None, {"inputs_id": input_ids})[0][0] - ) - if "pixel_values" in onnx_inputs: - for pixel_values in onnx_inputs["pixel_values"]: - if len(pixel_values.shape) == 3: - pixel_values = pixel_values[None, ...] - image_embeddings.append( - self.image_encoder_runner.run(None, {"pixel_values": pixel_values})[ - 0 - ][0] - ) - return np.array(text_embeddings), np.array(image_embeddings) - - -class AXJinaV2Embedding(BaseEmbedding): - def __init__( - self, - model_size: str, - requestor: InterProcessRequestor, - device: str = "AUTO", - embedding_type: str = None, - ): - HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") - super().__init__( - model_name="AXERA-TECH/jina-clip-v2", - model_file=None, - download_urls={ - "image_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/image_encoder.axmodel", - "text_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/text_encoder.axmodel", - }, - ) - - self.tokenizer_source = "jinaai/jina-clip-v2" - self.tokenizer_file = "tokenizer" - self.embedding_type = embedding_type - self.requestor = requestor - self.model_size = model_size - self.device = device - self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name) - self.tokenizer = None - self.image_processor = None - self.runner = None - self.mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32) - self.std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32) - - # Lock to prevent concurrent calls (text and vision share this instance) - self._call_lock = threading.Lock() - - # download the model and tokenizer - files_names = list(self.download_urls.keys()) + [self.tokenizer_file] - if not all( - os.path.exists(os.path.join(self.download_path, n)) for n in files_names - ): - logger.debug(f"starting model download for {self.model_name}") - self.downloader = ModelDownloader( - model_name=self.model_name, - download_path=self.download_path, - file_names=files_names, - download_func=self._download_model, - ) - self.downloader.ensure_model_files() - # Avoid lazy loading in worker threads: block until downloads complete - # and load the model on the main thread during initialization. - self._load_model_and_utils() - else: - self.downloader = None - ModelDownloader.mark_files_state( - self.requestor, - self.model_name, - files_names, - ModelStatusTypesEnum.downloaded, - ) - self._load_model_and_utils() - logger.debug(f"models are already downloaded for {self.model_name}") - - def _download_model(self, path: str): - try: - file_name = os.path.basename(path) - - if file_name in self.download_urls: - ModelDownloader.download_from_url(self.download_urls[file_name], path) - elif file_name == self.tokenizer_file: - tokenizer = AutoTokenizer.from_pretrained( - self.tokenizer_source, - trust_remote_code=True, - cache_dir=os.path.join( - MODEL_CACHE_DIR, self.model_name, "tokenizer" - ), - clean_up_tokenization_spaces=True, - ) - tokenizer.save_pretrained(path) - self.requestor.send_data( - UPDATE_MODEL_STATE, - { - "model": f"{self.model_name}-{file_name}", - "state": ModelStatusTypesEnum.downloaded, - }, - ) - except Exception: - self.requestor.send_data( - UPDATE_MODEL_STATE, - { - "model": f"{self.model_name}-{file_name}", - "state": ModelStatusTypesEnum.error, - }, - ) - - def _load_model_and_utils(self): - if self.runner is None: - if self.downloader: - self.downloader.wait_for_download() - - self.tokenizer = AutoTokenizer.from_pretrained( - self.tokenizer_source, - cache_dir=os.path.join(MODEL_CACHE_DIR, self.model_name, "tokenizer"), - trust_remote_code=True, - clean_up_tokenization_spaces=True, - ) - - self.runner = AXClipRunner( - os.path.join(self.download_path, "image_encoder.axmodel"), - os.path.join(self.download_path, "text_encoder.axmodel"), - ) - - def _preprocess_image(self, image_data: bytes | Image.Image): - """ - Manually preprocess a single image from bytes or PIL.Image to (3, 512, 512). - """ - if isinstance(image_data, bytes): - image = Image.open(io.BytesIO(image_data)) - else: - image = image_data - - if image.mode != "RGB": - image = image.convert("RGB") - - image = image.resize((512, 512), Image.Resampling.LANCZOS) - - # Convert to numpy array, normalize to [0, 1], and transpose to (channels, height, width) - image_array = np.array(image, dtype=np.float32) / 255.0 - # Normalize using mean and std - image_array = (image_array - self.mean) / self.std - - image_array = np.transpose(image_array, (2, 0, 1)) # (H, W, C) -> (C, H, W) - - return image_array - - def _preprocess_inputs(self, raw_inputs): - """ - Preprocess inputs into a list of real input tensors (no dummies). - - For text: Returns list of input_ids. - - For vision: Returns list of pixel_values. - """ - if not isinstance(raw_inputs, list): - raw_inputs = [raw_inputs] - - processed = [] - if self.embedding_type == "text": - for text in raw_inputs: - input_ids = self.tokenizer( - [text], return_tensors="np", padding="max_length", max_length=50 - )["input_ids"] - input_ids = input_ids.astype(np.int32) - processed.append(input_ids) - elif self.embedding_type == "vision": - for img in raw_inputs: - pixel_values = self._preprocess_image(img) - processed.append( - pixel_values[np.newaxis, ...] - ) # Add batch dim: (1, 3, 512, 512) - else: - raise ValueError( - f"Invalid embedding_type: {self.embedding_type}. Must be 'text' or 'vision'." - ) - return processed - - def _postprocess_outputs(self, outputs): - """ - Process ONNX model outputs, truncating each embedding in the array to truncate_dim. - - outputs: NumPy array of embeddings. - - Returns: List of truncated embeddings. - """ - # size of vector in database - truncate_dim = 768 - - # jina v2 defaults to 1024 and uses Matryoshka representation, so - # truncating only causes an extremely minor decrease in retrieval accuracy - if outputs.shape[-1] > truncate_dim: - outputs = outputs[..., :truncate_dim] - - return outputs - - def __call__( - self, inputs: list[str] | list[Image.Image] | list[str], embedding_type=None - ): - # Lock the entire call to prevent race conditions when text and vision - # embeddings are called concurrently from different threads - with self._call_lock: - self.embedding_type = embedding_type - if not self.embedding_type: - raise ValueError( - "embedding_type must be specified either in __init__ or __call__" - ) - - self._load_model_and_utils() - processed = self._preprocess_inputs(inputs) - - # Prepare ONNX inputs with matching batch sizes - onnx_inputs = {} - if self.embedding_type == "text": - onnx_inputs["input_ids"] = np.stack([x[0] for x in processed]) - elif self.embedding_type == "vision": - onnx_inputs["pixel_values"] = np.stack([x[0] for x in processed]) - else: - raise ValueError("Invalid embedding type") - - # Run inference - text_embeddings, image_embeddings = self.runner.run(onnx_inputs) - if self.embedding_type == "text": - embeddings = text_embeddings # text embeddings - elif self.embedding_type == "vision": - embeddings = image_embeddings # image embeddings - else: - raise ValueError("Invalid embedding type") - - embeddings = self._postprocess_outputs(embeddings) - return [embedding for embedding in embeddings] diff --git a/frigate/util/axengine_converter.py b/frigate/util/axengine_converter.py new file mode 100644 index 000000000..ab465df4f --- /dev/null +++ b/frigate/util/axengine_converter.py @@ -0,0 +1,190 @@ +"""AXEngine model loading utility for Frigate.""" + +import logging +import os +import time +from pathlib import Path + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.const import UPDATE_MODEL_STATE +from frigate.types import ModelStatusTypesEnum +from frigate.util.downloader import ModelDownloader +from frigate.util.file import FileLock + +logger = logging.getLogger(__name__) + +AXENGINE_JINA_V2_MODEL = "jina_v2" +AXENGINE_JINA_V2_REPO = "AXERA-TECH/jina-clip-v2" + + +def get_axengine_model_type(model_path: str) -> str | None: + if "jina-clip-v2" in str(model_path): + return AXENGINE_JINA_V2_MODEL + + return None + + +def is_axengine_compatible( + model_path: str, device: str | None, model_type: str | None = None +) -> bool: + if (device or "").upper() != "AXENGINE": + return False + + if not model_type: + model_type = get_axengine_model_type(model_path) + + return model_type == AXENGINE_JINA_V2_MODEL + + +def wait_for_download_completion( + image_model_path: Path, + text_model_path: Path, + lock_path: Path, + timeout: int = 300, +) -> bool: + start_time = time.time() + + while time.time() - start_time < timeout: + if image_model_path.exists() and text_model_path.exists(): + return True + + if not lock_path.exists(): + return image_model_path.exists() and text_model_path.exists() + + time.sleep(1) + + logger.warning("Timeout waiting for AXEngine model files: %s", image_model_path) + return False + + +def auto_convert_model(model_path: str, model_type: str | None = None) -> str | None: + """Prepare AXEngine model files and return the image encoder path.""" + if not is_axengine_compatible(model_path, "AXENGINE", model_type): + return None + + model_dir = Path(model_path).parent + ui_model_key = f"jinaai/jina-clip-v2-{Path(model_path).name}" + ui_preprocessor_key = "jinaai/jina-clip-v2-preprocessor_config.json" + image_model_path = model_dir / "image_encoder.axmodel" + text_model_path = model_dir / "text_encoder.axmodel" + model_repo = os.environ.get("AXENGINE_JINA_V2_REPO", AXENGINE_JINA_V2_REPO) + hf_endpoint = os.environ.get("HF_ENDPOINT", "https://huggingface.co") + requestor = InterProcessRequestor() + + download_targets = { + "image_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/image_encoder.axmodel", + "text_encoder.axmodel": f"{hf_endpoint}/{model_repo}/resolve/main/text_encoder.axmodel", + } + + if image_model_path.exists() and text_model_path.exists(): + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_preprocessor_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + requestor.stop() + return str(image_model_path) + + lock_path = model_dir / ".axengine.download.lock" + lock = FileLock(lock_path, timeout=300, cleanup_stale_on_init=True) + + if lock.acquire(): + try: + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_preprocessor_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.downloading, + }, + ) + + for file_name, url in download_targets.items(): + target_path = model_dir / file_name + if target_path.exists(): + continue + + target_path.parent.mkdir(parents=True, exist_ok=True) + ModelDownloader.download_from_url(url, str(target_path)) + + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + + return str(image_model_path) + except Exception: + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.error, + }, + ) + logger.exception( + "Failed to prepare AXEngine model files for %s", model_repo + ) + return None + finally: + requestor.stop() + lock.release() + + logger.info("Another process is preparing AXEngine models, waiting for completion") + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_preprocessor_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.downloading, + }, + ) + requestor.stop() + + if wait_for_download_completion(image_model_path, text_model_path, lock_path): + if image_model_path.exists() and text_model_path.exists(): + requestor = InterProcessRequestor() + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.downloaded, + }, + ) + requestor.stop() + return str(image_model_path) + + logger.error("Timeout waiting for AXEngine model download lock for %s", model_dir) + requestor = InterProcessRequestor() + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": ui_model_key, + "state": ModelStatusTypesEnum.error, + }, + ) + requestor.stop() + return None diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 98c1a653c..8f50e982e 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -292,46 +292,32 @@ export default function Explore() { const modelVersion = config?.semantic_search.model || "jinav1"; const modelSize = config?.semantic_search.model_size || "small"; - const isAxJinaV2 = useMemo( - () => - modelVersion === "jinav2" && - Object.values( - (config?.detectors ?? {}) as Record, - ).some((detector) => detector?.type === "axengine"), - [modelVersion, config?.detectors], - ); // Text model state const { payload: textModelState } = useModelState( - isAxJinaV2 - ? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel" - : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-text_model_fp16.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx", + modelVersion === "jinav1" + ? "jinaai/jina-clip-v1-text_model_fp16.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx", ); // Tokenizer state const { payload: textTokenizerState } = useModelState( - isAxJinaV2 - ? "AXERA-TECH/jina-clip-v2-tokenizer" - : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-tokenizer" - : "jinaai/jina-clip-v2-tokenizer", + modelVersion === "jinav1" + ? "jinaai/jina-clip-v1-tokenizer" + : "jinaai/jina-clip-v2-tokenizer", ); // Vision model state (same as text model for jinav2) const visionModelFile = - isAxJinaV2 - ? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel" - : modelVersion === "jinav1" - ? modelSize === "large" - ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" - : "jinaai/jina-clip-v1-vision_model_quantized.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx"; + modelVersion === "jinav1" + ? modelSize === "large" + ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" + : "jinaai/jina-clip-v1-vision_model_quantized.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx"; const { payload: visionModelState } = useModelState(visionModelFile); // Preprocessor/feature extractor state @@ -346,10 +332,9 @@ export default function Explore() { textModelState === "downloaded" && textTokenizerState === "downloaded" && visionModelState === "downloaded" && - (isAxJinaV2 || visionFeatureExtractorState === "downloaded") + visionFeatureExtractorState === "downloaded" ); }, [ - isAxJinaV2, textModelState, textTokenizerState, visionModelState, @@ -376,7 +361,7 @@ export default function Explore() { !textModelState || !textTokenizerState || !visionModelState || - (!isAxJinaV2 && !visionFeatureExtractorState))) + !visionFeatureExtractorState)) ) { return ( @@ -466,14 +451,12 @@ export default function Explore() { "exploreIsUnavailable.downloadingModels.setup.visionModel", )}
- {!isAxJinaV2 && ( -
- {renderModelStateIcon(visionFeatureExtractorState)} - {t( - "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", - )} -
- )} +
+ {renderModelStateIcon(visionFeatureExtractorState)} + {t( + "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", + )} +
{renderModelStateIcon(textModelState)} {t( @@ -490,7 +473,7 @@ export default function Explore() { {(textModelState === "error" || textTokenizerState === "error" || visionModelState === "error" || - (!isAxJinaV2 && visionFeatureExtractorState === "error")) && ( + visionFeatureExtractorState === "error") && (
{t("exploreIsUnavailable.downloadingModels.error")}