Initial commit for AXERA AI accelerators

2026-03-10 02:29:19 +03:00 · 2025-10-24 08:22:56 +00:00 · 2025-10-24 08:22:56 +00:00 · 7b4eaf2d10
commit 7b4eaf2d10
parent 4e99ee0c33
9 changed files with 484 additions and 0 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -225,3 +225,29 @@ jobs:
          sources: |
            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-amd64
            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi
+  axera_build:
+    runs-on: ubuntu-22.04
+    name: AXERA Build
+    needs:
+      - amd64_build
+      - arm64_build
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v5
+        with:
+          persist-credentials: false
+      - name: Set up QEMU and Buildx
+        id: setup
+        uses: ./.github/actions/setup
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push Axera build
+        uses: docker/bake-action@v6
+        with:
+          source: .
+          push: true
+          targets: axcl
+          files: docker/axcl/axcl.hcl
+          set: |
+            axcl.tags=${{ steps.setup.outputs.image-name }}-axcl
+            *.cache-from=type=gha
--- a/docker/axcl/Dockerfile
+++ b/docker/axcl/Dockerfile
@ -0,0 +1,59 @@
+# syntax=docker/dockerfile:1.6
+
+# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Globally set pip break-system-packages option to avoid having to specify it every time
+ARG PIP_BREAK_SYSTEM_PACKAGES=1
+
+
+FROM frigate AS frigate-axcl
+ARG TARGETARCH
+ARG PIP_BREAK_SYSTEM_PACKAGES
+
+# Install axmodels
+RUN mkdir -p /axmodels \
+    && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov5s_320.axmodel -O /axmodels/yolov5s_320.axmodel
+
+# Install axpyengine
+RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl
+RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \
+    && rm /axengine-0.1.3-py3-none-any.whl
+
+# Install axcl
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+        echo "Installing x86_64 version of axcl"; \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \
+    else \
+        echo "Installing aarch64 version of axcl"; \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \
+    fi
+
+RUN mkdir /unpack_axcl && \
+    dpkg-deb -x /axcl.deb /unpack_axcl && \
+    cp -R /unpack_axcl/usr/bin/axcl /usr/bin/ && \
+    cp -R /unpack_axcl/usr/lib/axcl /usr/lib/ && \
+    rm -rf /unpack_axcl /axcl.deb
+
+
+# Install axcl ffmpeg
+RUN mkdir -p /usr/lib/ffmpeg/axcl
+
+RUN if [ "$TARGETARCH" = "amd64" ]; then \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-x64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-x64 -O /usr/lib/ffmpeg/axcl/ffprobe; \
+    else \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-aarch64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \
+        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-aarch64 -O /usr/lib/ffmpeg/axcl/ffprobe; \
+    fi
+
+RUN chmod +x /usr/lib/ffmpeg/axcl/ffmpeg /usr/lib/ffmpeg/axcl/ffprobe
+
+# Set ldconfig path
+RUN echo "/usr/lib/axcl" > /etc/ld.so.conf.d/ax.conf
+
+# Set env
+ENV PATH="$PATH:/usr/bin/axcl"
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/axcl"
+
+ENTRYPOINT ["sh", "-c", "ldconfig && exec /init"]
--- a/docker/axcl/axcl.hcl
+++ b/docker/axcl/axcl.hcl
@ -0,0 +1,13 @@
+target frigate {
+  dockerfile = "docker/main/Dockerfile"
+  platforms = ["linux/amd64", "linux/arm64"]
+  target = "frigate"
+}
+
+target axcl {
+  dockerfile = "docker/axcl/Dockerfile"
+  contexts = {
+    frigate = "target:frigate",
+  }
+  platforms = ["linux/amd64", "linux/arm64"]
+}
--- a/docker/axcl/axcl.mk
+++ b/docker/axcl/axcl.mk
@ -0,0 +1,15 @@
+BOARDS += axcl
+
+local-axcl: version
+	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
+		--set axcl.tags=frigate:latest-axcl \
+		--load
+
+build-axcl: version
+	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
+		--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl
+
+push-axcl: build-axcl
+	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
+		--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl \
+		--push
--- a/docker/axcl/user_installation.sh
+++ b/docker/axcl/user_installation.sh
@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Update package list and install dependencies
+sudo apt-get update
+sudo apt-get install -y build-essential cmake git wget pciutils kmod udev
+
+# Check if gcc-12 is needed
+current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}')
+gcc_major_version=$(echo $current_gcc_version | cut -d'.' -f1)
+
+if [[ $gcc_major_version -lt 12 ]]; then
+    echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..."
+    sudo apt-get install -y gcc-12
+    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
+    echo "GCC-12 installed and set as default"
+else
+    echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation"
+fi
+
+# Determine architecture
+arch=$(uname -m)
+download_url=""
+
+if [[ $arch == "x86_64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb"
+    deb_file="axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb"
+elif [[ $arch == "aarch64" ]]; then
+    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb"
+    deb_file="axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb"
+else
+    echo "Unsupported architecture: $arch"
+    exit 1
+fi
+
+# Download AXCL driver
+echo "Downloading AXCL driver for $arch..."
+wget "$download_url" -O "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to download AXCL driver"
+    exit 1
+fi
+
+# Install AXCL driver
+echo "Installing AXCL driver..."
+sudo dpkg -i "$deb_file"
+
+if [ $? -ne 0 ]; then
+    echo "Failed to install AXCL driver, attempting to fix dependencies..."
+    sudo apt-get install -f -y
+    sudo dpkg -i "$deb_file"
+
+    if [ $? -ne 0 ]; then
+        echo "AXCL driver installation failed"
+        exit 1
+    fi
+fi
+
+# Update environment
+echo "Updating environment..."
+source /etc/profile
+
+# Verify installation
+echo "Verifying AXCL installation..."
+if command -v axcl-smi &> /dev/null; then
+    echo "AXCL driver detected, checking AI accelerator status..."
+
+    axcl_output=$(axcl-smi 2>&1)
+    axcl_exit_code=$?
+
+    echo "$axcl_output"
+
+    if [ $axcl_exit_code -eq 0 ]; then
+        echo "AXCL driver installation completed successfully!"
+    else
+        echo "AXCL driver installed but no AI accelerator detected or communication failed."
+        echo "Please check if the AI accelerator is properly connected and powered on."
+        exit 1
+    fi
+else
+    echo "axcl-smi command not found. AXCL driver installation may have failed."
+    exit 1
+fi
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -47,6 +47,11 @@ Frigate supports multiple different detectors that work on different types of ha

 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs.

+**AXERA**
+
+- [AXEngine](#axera): axmodels can run on AXERA AI acceleration.
+
+
 **For Testing**

 - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
@ -1099,6 +1104,40 @@ model:  # required
  labelmap_path: /labelmap/coco-80.txt  # required
 ```

+## AXERA
+
+Hardware accelerated object detection is supported on the following SoCs:
+
+- AX650N
+- AX8850N
+
+This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2).
+
+See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware.
+
+### Configuration
+
+When configuring the AXEngine detector, you have to specify the model name.
+
+#### yolov5s
+
+A yolov5s model is provided in the container at /axmodels and is used by this detector type by default. 
+
+Use the model configuration shown below when using the axengine detector with the default axmodel:
+
+```yaml
+detectors:  # required
+  axengine:  # required
+    type: axengine  # required
+
+model:  # required
+  path: yolov5s_320  # required
+  width: 320  # required
+  height: 320  # required
+  tensor_format: bgr  # required
+  labelmap_path: /labelmap/coco-80.txt  # required
+```
+
 ## Rockchip platform

 Hardware accelerated object detection is supported on the following SoCs:
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@ -110,6 +110,20 @@ Frigate supports multiple different detectors that work on different types of ha
 | ssd mobilenet    | ~ 25 ms                         |
 | yolov5m          | ~ 118 ms                        |

+**Synaptics**
+
+- [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection.
+
+:::
+
+### AXERA
+
+- **AXEngine** Default model is **yolov5s_320**
+
+| Name             | AXERA AX650N/AX8850N Inference Time |
+| ---------------- | ----------------------------------- |
+| yolov5s_320      | ~ 1.676 ms                             |
+
 ### Hailo-8

 Frigate supports both the Hailo-8 and Hailo-8L AI Acceleration Modules on compatible hardware platforms—including the Raspberry Pi 5 with the PCIe hat from the AI kit. The Hailo detector integration in Frigate automatically identifies your hardware type and selects the appropriate default model when a custom model isn’t provided.
--- a/docs/docs/frigate/installation.md
+++ b/docs/docs/frigate/installation.md
@ -287,6 +287,40 @@ or add these options to your `docker run` command:

 Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics).

+### AXERA
+
+AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments.
+
+#### Installation
+
+Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation.
+
+Follow these steps for installation:
+
+1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh).
+2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh`
+3. Run the script with `./user_installation.sh`
+
+#### Setup
+
+To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable`
+
+Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file:
+
+```yaml
+devices:
+  - /dev/axcl_host
+  - /dev/ax_mmb_dev
+  - /dev/msg_userdev
+```
+
+If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev`
+
+#### Configuration
+
+Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup.
+
+
 ## Docker

 Running through Docker with Docker Compose is the recommended install method.
--- a/frigate/detectors/plugins/axengine.py
+++ b/frigate/detectors/plugins/axengine.py
@ -0,0 +1,201 @@
+import logging
+import os.path
+import re
+import urllib.request
+from typing import Literal
+
+import cv2
+import numpy as np
+from pydantic import Field
+
+from frigate.const import MODEL_CACHE_DIR
+from frigate.detectors.detection_api import DetectionApi
+from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
+from frigate.util.model import post_process_yolo
+
+import axengine as axe
+from axengine import axclrt_provider_name, axengine_provider_name
+
+logger = logging.getLogger(__name__)
+
+DETECTOR_KEY = "axengine"
+
+CONF_THRESH = 0.65
+IOU_THRESH = 0.45
+STRIDES = [8, 16, 32]
+ANCHORS = [
+    [10, 13, 16, 30, 33, 23],
+    [30, 61, 62, 45, 59, 119],
+    [116, 90, 156, 198, 373, 326],
+]
+
+class AxengineDetectorConfig(BaseDetectorConfig):
+    type: Literal[DETECTOR_KEY]
+
+class Axengine(DetectionApi):
+    type_key = DETECTOR_KEY
+    def __init__(self, config: AxengineDetectorConfig):
+        logger.info("__init__ axengine")
+        super().__init__(config)
+        self.height = config.model.height
+        self.width = config.model.width
+        model_path = config.model.path or "yolov5s_320"
+        self.session = axe.InferenceSession(f"/axmodels/{model_path}.axmodel")
+
+    def __del__(self):
+        pass
+
+    def xywh2xyxy(self, x):
+        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+        y = np.copy(x)
+        y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+        y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+        y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+        y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+        return y
+
+    def bboxes_iou(self, boxes1, boxes2):
+        """calculate the Intersection Over Union value"""
+        boxes1 = np.array(boxes1)
+        boxes2 = np.array(boxes2)
+
+        boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (
+            boxes1[..., 3] - boxes1[..., 1]
+        )
+        boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (
+            boxes2[..., 3] - boxes2[..., 1]
+        )
+
+        left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
+        right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
+
+        inter_section = np.maximum(right_down - left_up, 0.0)
+        inter_area = inter_section[..., 0] * inter_section[..., 1]
+        union_area = boxes1_area + boxes2_area - inter_area
+        ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
+
+        return ious
+
+    def nms(self, proposals, iou_threshold, conf_threshold, multi_label=False):
+        """
+        :param bboxes: (xmin, ymin, xmax, ymax, score, class)
+
+        Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
+            https://github.com/bharatsingh430/soft-nms
+        """
+        xc = proposals[..., 4] > conf_threshold
+        proposals = proposals[xc]
+        proposals[:, 5:] *= proposals[:, 4:5]
+        bboxes = self.xywh2xyxy(proposals[:, :4])
+        if multi_label:
+            mask = proposals[:, 5:] > conf_threshold
+            nonzero_indices = np.argwhere(mask)
+            if nonzero_indices.size < 0:
+                return
+            i, j = nonzero_indices.T
+            bboxes = np.hstack(
+                (bboxes[i], proposals[i, j + 5][:, None], j[:, None].astype(float))
+            )
+        else:
+            confidences = proposals[:, 5:]
+            conf = confidences.max(axis=1, keepdims=True)
+            j = confidences.argmax(axis=1)[:, None]
+
+            new_x_parts = [bboxes, conf, j.astype(float)]
+            bboxes = np.hstack(new_x_parts)
+
+            mask = conf.reshape(-1) > conf_threshold
+            bboxes = bboxes[mask]
+
+        classes_in_img = list(set(bboxes[:, 5]))
+        bboxes = bboxes[bboxes[:, 4].argsort()[::-1][:300]]
+        best_bboxes = []
+
+        for cls in classes_in_img:
+            cls_mask = bboxes[:, 5] == cls
+            cls_bboxes = bboxes[cls_mask]
+
+            while len(cls_bboxes) > 0:
+                max_ind = np.argmax(cls_bboxes[:, 4])
+                best_bbox = cls_bboxes[max_ind]
+                best_bboxes.append(best_bbox)
+                cls_bboxes = np.concatenate(
+                    [cls_bboxes[:max_ind], cls_bboxes[max_ind + 1 :]]
+                )
+                iou = self.bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
+                weight = np.ones((len(iou),), dtype=np.float32)
+
+                iou_mask = iou > iou_threshold
+                weight[iou_mask] = 0.0
+
+                cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
+                score_mask = cls_bboxes[:, 4] > 0.0
+                cls_bboxes = cls_bboxes[score_mask]
+
+        if len(best_bboxes) == 0:
+            return np.empty((0, 6))
+
+        best_bboxes = np.vstack(best_bboxes)
+        best_bboxes = best_bboxes[best_bboxes[:, 4].argsort()[::-1]]
+        return best_bboxes
+
+    def sigmoid(self, x):
+        return np.clip(0.2 * x + 0.5, 0, 1)
+
+    def gen_proposals(self, outputs):
+        new_pred = []
+        anchor_grid = np.array(ANCHORS).reshape(-1, 1, 1, 3, 2)
+        for i, pred in enumerate(outputs):
+            pred = self.sigmoid(pred)
+            n, h, w, c = pred.shape
+
+            pred = pred.reshape(n, h, w, 3, 85)
+            conv_shape = pred.shape
+            output_size = conv_shape[1]
+            conv_raw_dxdy = pred[..., 0:2]
+            conv_raw_dwdh = pred[..., 2:4]
+            xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
+            xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
+
+            xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
+            xy_grid = xy_grid.astype(np.float32)
+            pred_xy = (conv_raw_dxdy * 2.0 - 0.5 + xy_grid) * STRIDES[i]
+            pred_wh = (conv_raw_dwdh * 2) ** 2 * anchor_grid[i]
+            pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)
+
+            new_pred.append(np.reshape(pred, (-1, np.shape(pred)[-1])))
+
+        return np.concatenate(new_pred, axis=0)
+
+    def post_processing(self, outputs, input_shape, threshold=0.3):
+        proposals = self.gen_proposals(outputs)
+        bboxes = self.nms(proposals, IOU_THRESH, CONF_THRESH, multi_label=True)
+
+        """
+        bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
+        """
+
+        results = np.zeros((20, 6), np.float32)
+
+        for i, bbox in enumerate(bboxes):
+            if i >= 20:
+                break
+            coor = np.array(bbox[:4], dtype=np.int32)
+            score = bbox[4]
+            if score < threshold:
+                continue
+            class_ind = int(bbox[5])
+            results[i] = [
+                class_ind,
+                score,
+                max(0, bbox[1]) / input_shape[1],
+                max(0, bbox[0]) / input_shape[0],
+                min(1, bbox[3] / input_shape[1]),
+                min(1, bbox[2] / input_shape[0]),
+            ]
+        return results
+
+    def detect_raw(self, tensor_input):
+        results = None
+        results = self.session.run(None, {"images": tensor_input})
+        return self.post_processing(results, (self.width, self.height))