Merge acb17a7b50 into 9ab78f496c

Adds support for YOLO v9 models running on Google Coral (#21124 )
* Adds support for YOLO v9 models running on Google Coral * fix format by using ruff instead of black * Remove comment Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com> * Remove log message Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com> * revert to hard-coded settings. use ModelTypeEnum directly * remove log messages. detect invalid output tensor count * remove 1-tensor processing. add pre_process() function * check for valid model type * fix formatting * remove unused import and variable * remove tip that indicates other YOLO models may be supported. --------- Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>
2025-12-06 13:34:13 +03:00 · 2025-12-03 09:55:51 +08:00 · 2025-12-02 13:26:57 -07:00 · 2025-12-02 09:41:02 -07:00 · 2025-12-01 04:47:39 +00:00 · 2025-11-24 23:04:19 +08:00
13 changed files with 713 additions and 75 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -136,7 +136,6 @@ jobs:
            *.cache-to=type=registry,ref=${{ steps.setup.outputs.cache-name }}-tensorrt,mode=max
      - name: AMD/ROCm general build
        env:
          AMDGPU: gfx
          HSA_OVERRIDE: 0
        uses: docker/bake-action@v6
        with:
@ -225,3 +224,29 @@ jobs:
          sources: |
            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-amd64
            ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi
  axera_build:
    runs-on: ubuntu-22.04
    name: AXERA Build
    needs:
      - amd64_build
      - arm64_build
    steps:
      - name: Check out code
        uses: actions/checkout@v5
        with:
          persist-credentials: false
      - name: Set up QEMU and Buildx
        id: setup
        uses: ./.github/actions/setup
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - name: Build and push Axera build
        uses: docker/bake-action@v6
        with:
          source: .
          push: true
          targets: axcl
          files: docker/axcl/axcl.hcl
          set: |
            axcl.tags=${{ steps.setup.outputs.image-name }}-axcl
            *.cache-from=type=gha
--- a/docker/axcl/Dockerfile
+++ b/docker/axcl/Dockerfile
@ -0,0 +1,55 @@
 # syntax=docker/dockerfile:1.6
 # https://askubuntu.com/questions/972516/debian-frontend-environment-variable
 ARG DEBIAN_FRONTEND=noninteractive
 # Globally set pip break-system-packages option to avoid having to specify it every time
 ARG PIP_BREAK_SYSTEM_PACKAGES=1
 FROM frigate AS frigate-axcl
 ARG TARGETARCH
 ARG PIP_BREAK_SYSTEM_PACKAGES
 # Install axpyengine
 RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl
 RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \
    && rm /axengine-0.1.3-py3-none-any.whl
 # Install axcl
 RUN if [ "$TARGETARCH" = "amd64" ]; then \
        echo "Installing x86_64 version of axcl"; \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \
    else \
        echo "Installing aarch64 version of axcl"; \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb -O /axcl.deb; \
    fi
 RUN mkdir /unpack_axcl && \
    dpkg-deb -x /axcl.deb /unpack_axcl && \
    cp -R /unpack_axcl/usr/bin/axcl /usr/bin/ && \
    cp -R /unpack_axcl/usr/lib/axcl /usr/lib/ && \
    rm -rf /unpack_axcl /axcl.deb
 # Install axcl ffmpeg
 RUN mkdir -p /usr/lib/ffmpeg/axcl
 RUN if [ "$TARGETARCH" = "amd64" ]; then \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-x64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-x64 -O /usr/lib/ffmpeg/axcl/ffprobe; \
    else \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffmpeg-aarch64 -O /usr/lib/ffmpeg/axcl/ffmpeg && \
        wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/ffprobe-aarch64 -O /usr/lib/ffmpeg/axcl/ffprobe; \
    fi
 RUN chmod +x /usr/lib/ffmpeg/axcl/ffmpeg /usr/lib/ffmpeg/axcl/ffprobe
 # Set ldconfig path
 RUN echo "/usr/lib/axcl" > /etc/ld.so.conf.d/ax.conf
 # Set env
 ENV PATH="$PATH:/usr/bin/axcl"
 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/axcl"
 ENTRYPOINT ["sh", "-c", "ldconfig && exec /init"]
--- a/docker/axcl/axcl.hcl
+++ b/docker/axcl/axcl.hcl
@ -0,0 +1,13 @@
 target frigate {
  dockerfile = "docker/main/Dockerfile"
  platforms = ["linux/amd64", "linux/arm64"]
  target = "frigate"
 }
 target axcl {
  dockerfile = "docker/axcl/Dockerfile"
  contexts = {
    frigate = "target:frigate",
  }
  platforms = ["linux/amd64", "linux/arm64"]
 }
--- a/docker/axcl/axcl.mk
+++ b/docker/axcl/axcl.mk
@ -0,0 +1,15 @@
 BOARDS += axcl
 local-axcl: version
 	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
 		--set axcl.tags=frigate:latest-axcl \
 		--load
 build-axcl: version
 	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
 		--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl
 push-axcl: build-axcl
 	docker buildx bake --file=docker/axcl/axcl.hcl axcl \
 		--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl \
 		--push
--- a/docker/axcl/user_installation.sh
+++ b/docker/axcl/user_installation.sh
@ -0,0 +1,83 @@
 #!/bin/bash
 # Update package list and install dependencies
 sudo apt-get update
 sudo apt-get install -y build-essential cmake git wget pciutils kmod udev
 # Check if gcc-12 is needed
 current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}')
 gcc_major_version=$(echo $current_gcc_version | cut -d'.' -f1)
 if [[ $gcc_major_version -lt 12 ]]; then
    echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..."
    sudo apt-get install -y gcc-12
    sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
    echo "GCC-12 installed and set as default"
 else
    echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation"
 fi
 # Determine architecture
 arch=$(uname -m)
 download_url=""
 if [[ $arch == "x86_64" ]]; then
    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb"
    deb_file="axcl_host_x86_64_V3.6.5_20250908154509_NO4973.deb"
 elif [[ $arch == "aarch64" ]]; then
    download_url="https://github.com/ivanshi1108/assets/releases/download/v0.16.2/axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb"
    deb_file="axcl_host_aarch64_V3.6.5_20250908154509_NO4973.deb"
 else
    echo "Unsupported architecture: $arch"
    exit 1
 fi
 # Download AXCL driver
 echo "Downloading AXCL driver for $arch..."
 wget "$download_url" -O "$deb_file"
 if [ $? -ne 0 ]; then
    echo "Failed to download AXCL driver"
    exit 1
 fi
 # Install AXCL driver
 echo "Installing AXCL driver..."
 sudo dpkg -i "$deb_file"
 if [ $? -ne 0 ]; then
    echo "Failed to install AXCL driver, attempting to fix dependencies..."
    sudo apt-get install -f -y
    sudo dpkg -i "$deb_file"
    if [ $? -ne 0 ]; then
        echo "AXCL driver installation failed"
        exit 1
    fi
 fi
 # Update environment
 echo "Updating environment..."
 source /etc/profile
 # Verify installation
 echo "Verifying AXCL installation..."
 if command -v axcl-smi &> /dev/null; then
    echo "AXCL driver detected, checking AI accelerator status..."
    axcl_output=$(axcl-smi 2>&1)
    axcl_exit_code=$?
    echo "$axcl_output"
    if [ $axcl_exit_code -eq 0 ]; then
        echo "AXCL driver installation completed successfully!"
    else
        echo "AXCL driver installed but no AI accelerator detected or communication failed."
        echo "Please check if the AI accelerator is properly connected and powered on."
        exit 1
    fi
 else
    echo "axcl-smi command not found. AXCL driver installation may have failed."
    exit 1
 fi
--- a/docker/rocm/Dockerfile
+++ b/docker/rocm/Dockerfile
@ -3,7 +3,6 @@
 # https://askubuntu.com/questions/972516/debian-frontend-environment-variable
 ARG DEBIAN_FRONTEND=noninteractive
 ARG ROCM=1
 ARG AMDGPU=gfx900
 ARG HSA_OVERRIDE_GFX_VERSION
 ARG HSA_OVERRIDE
@ -11,7 +10,6 @@ ARG HSA_OVERRIDE
 FROM wget AS rocm
 ARG ROCM
 ARG AMDGPU
 RUN apt update -qq && \
    apt install -y wget gpg && \
@ -36,7 +34,10 @@ FROM deps AS deps-prelim
 COPY docker/rocm/debian-backports.sources /etc/apt/sources.list.d/debian-backports.sources
 RUN apt-get update && \
    apt-get install -y libnuma1 && \
-    apt-get install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers
+    apt-get install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers && \
    # Install C++ standard library headers for HIPRTC kernel compilation fallback
    apt-get install -qq -y libstdc++-12-dev && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /opt/frigate
 COPY --from=rootfs / /
@ -54,12 +55,14 @@ RUN pip3 uninstall -y onnxruntime \
 FROM scratch AS rocm-dist
 ARG ROCM
 ARG AMDGPU
 COPY --from=rocm /opt/rocm-$ROCM/bin/rocminfo /opt/rocm-$ROCM/bin/migraphx-driver /opt/rocm-$ROCM/bin/
-COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*$AMDGPU* /opt/rocm-$ROCM/share/miopen/db/
+# Copy MIOpen database files for gfx10xx and gfx11xx only (RDNA2/RDNA3)
-COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx908* /opt/rocm-$ROCM/share/miopen/db/
+COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx10* /opt/rocm-$ROCM/share/miopen/db/
-COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*$AMDGPU* /opt/rocm-$ROCM/lib/rocblas/library/
+COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx11* /opt/rocm-$ROCM/share/miopen/db/
 # Copy rocBLAS library files for gfx10xx and gfx11xx only
 COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*gfx10* /opt/rocm-$ROCM/lib/rocblas/library/
 COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*gfx11* /opt/rocm-$ROCM/lib/rocblas/library/
 COPY --from=rocm /opt/rocm-dist/ /
 #######################################################################
--- a/docker/rocm/rocm.hcl
+++ b/docker/rocm/rocm.hcl
@ -1,6 +1,3 @@
 variable "AMDGPU" {
  default = "gfx900"
 }
 variable "ROCM" {
  default = "7.1.1"
 }
@ -38,7 +35,6 @@ target rocm {
  }
  platforms = ["linux/amd64"]
  args = {
    AMDGPU = AMDGPU,
    ROCM = ROCM,
    HSA_OVERRIDE_GFX_VERSION = HSA_OVERRIDE_GFX_VERSION,
    HSA_OVERRIDE = HSA_OVERRIDE
--- a/docker/rocm/rocm.mk
+++ b/docker/rocm/rocm.mk
@ -1,53 +1,15 @@
 BOARDS += rocm
 # AMD/ROCm is chunky so we build couple of smaller images for specific chipsets
 ROCM_CHIPSETS:=gfx900:9.0.0 gfx1030:10.3.0 gfx1100:11.0.0
 local-rocm: version
 	$(foreach chipset,$(ROCM_CHIPSETS), \
 		AMDGPU=$(word 1,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE=1 \
 		docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 			--set rocm.tags=frigate:latest-rocm-$(word 1,$(subst :, ,$(chipset))) \
 			--load \
 	&&) true
 	unset HSA_OVERRIDE_GFX_VERSION && \
 	HSA_OVERRIDE=0 \
 	AMDGPU=gfx \
 	docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 		--set rocm.tags=frigate:latest-rocm \
 		--load
 build-rocm: version
 	$(foreach chipset,$(ROCM_CHIPSETS), \
 		AMDGPU=$(word 1,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE=1 \
 		docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 			--set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm-$(chipset) \
 	&&) true
 	unset HSA_OVERRIDE_GFX_VERSION && \
 	HSA_OVERRIDE=0 \
 	AMDGPU=gfx \
 	docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 		--set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm
 push-rocm: build-rocm
 	$(foreach chipset,$(ROCM_CHIPSETS), \
 		AMDGPU=$(word 1,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \
 		HSA_OVERRIDE=1 \
 		docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 			--set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm-$(chipset) \
 			--push \
 	&&) true
 	unset HSA_OVERRIDE_GFX_VERSION && \
 	HSA_OVERRIDE=0 \
 	AMDGPU=gfx \
 	docker buildx bake --file=docker/rocm/rocm.hcl rocm \
 		--set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm \
 		--push
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@ -13,7 +13,7 @@ Frigate supports multiple different detectors that work on different types of ha
 **Most Hardware**
- [Coral EdgeTPU](#edge-tpu-detector): The Google Coral EdgeTPU is available in USB and m.2 format allowing for a wide range of compatibility with devices.
+- [Coral EdgeTPU](#edge-tpu-detector): The Google Coral EdgeTPU is available in USB, Mini PCIe, and m.2 formats allowing for a wide range of compatibility with devices.
 - [Hailo](#hailo-8): The Hailo8 and Hailo8L AI Acceleration module is available in m.2 format with a HAT for RPi devices, offering a wide range of compatibility with devices.
 - <CommunityBadge /> [MemryX](#memryx-mx3): The MX3 Acceleration module is available in m.2 format, offering broad compatibility across various platforms.
 - <CommunityBadge /> [DeGirum](#degirum): Service for using hardware devices in the cloud or locally. Hardware and models provided on the cloud on [their website](https://hub.degirum.com).
@ -49,6 +49,11 @@ Frigate supports multiple different detectors that work on different types of ha
 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs.
 **AXERA** <CommunityBadge />
 - [AXEngine](#axera): axmodels can run on AXERA AI acceleration.
 **For Testing**
 - [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
@ -69,12 +74,10 @@ Frigate provides the following builtin detector types: `cpu`, `edgetpu`, `hailo8
 ## Edge TPU Detector
-The Edge TPU detector type runs a TensorFlow Lite model utilizing the Google Coral delegate for hardware acceleration. To configure an Edge TPU detector, set the `"type"` attribute to `"edgetpu"`.
+The Edge TPU detector type runs TensorFlow Lite models utilizing the Google Coral delegate for hardware acceleration. To configure an Edge TPU detector, set the `"type"` attribute to `"edgetpu"`.
 The Edge TPU device can be specified using the `"device"` attribute according to the [Documentation for the TensorFlow Lite Python API](https://coral.ai/docs/edgetpu/multiple-edgetpu/#using-the-tensorflow-lite-python-api). If not set, the delegate will use the first device it finds.
 A TensorFlow Lite model is provided in the container at `/edgetpu_model.tflite` and is used by this detector type by default. To provide your own model, bind mount the file into the container and provide the path with `model.path`.
 :::tip
 See [common Edge TPU troubleshooting steps](/troubleshooting/edgetpu) if the Edge TPU is not detected.
@ -146,6 +149,52 @@ detectors:
    device: pci
 ```
 ### EdgeTPU Supported Models
 | Model                                 | Notes                                       |
 | ------------------------------------- | ------------------------------------------- |
 | [MobileNet v2](#ssdlite-mobilenet-v2) | Default model                               |
 | [YOLOv9](#yolo-v9)                    | More accurate but slower than default model |
 #### SSDLite MobileNet v2
 A TensorFlow Lite model is provided in the container at `/edgetpu_model.tflite` and is used by this detector type by default. To provide your own model, bind mount the file into the container and provide the path with `model.path`.
 A Tensorflow Lite is provided in the container at `/openvino-model/ssdlite_mobilenet_v2.xml` and is used by this detector type by default. The model comes from Intel's Open Model Zoo [SSDLite MobileNet V2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2) and is converted to an INT8 precision model.
 #### YOLO v9
 [YOLOv9](https://github.com/dbro/frigate-detector-edgetpu-yolo9/releases/download/v1.0/yolov9-s-relu6-best_320_int8_edgetpu.tflite) models that are compiled for Tensorflow Lite and properly quantized are supported, but not included by default. To provide your own model, bind mount the file into the container and provide the path with `model.path`. Note that the model may require a custom label file (eg. [use this 17 label file](https://raw.githubusercontent.com/dbro/frigate-detector-edgetpu-yolo9/refs/heads/main/labels-coco17.txt) for the model linked above.)
 <details>
  <summary>YOLOv9 Setup & Config</summary>
 :::warning
 If you are using a Frigate+ YOLOv9 model, you should not define any of the below `model` parameters in your config except for `path`. See [the Frigate+ model docs](/plus/first_model#step-3-set-your-model-id-in-the-config) for more information on setting up your model.
 :::
 After placing the downloaded files for the tflite model and labels in your config folder, you can use the following configuration:
 ```yaml
 detectors:
  coral:
    type: edgetpu
    device: usb
 model:
  model_type: yolo-generic
  width: 320 # <--- should match the imgsize of the model, typically 320
  height: 320 # <--- should match the imgsize of the model, typically 320
  path: /config/model_cache/yolov9-s-relu6-best_320_int8_edgetpu.tflite
  labelmap_path: /labelmap/labels-coco-17.txt
 ```
 Note that the labelmap uses a subset of the complete COCO label set that has only 17 objects.
 </details>
 ---
 ## Hailo-8
@ -1438,6 +1487,41 @@ model:
  input_pixel_format: rgb/bgr # look at the model.json to figure out which to put here
 ```
 ## AXERA
 Hardware accelerated object detection is supported on the following SoCs:
 - AX650N
 - AX8850N
 This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2).
 See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware.
 ### Configuration
 When configuring the AXEngine detector, you have to specify the model name.
 #### yolov9
 A yolov9 model is provided in the container at /axmodels and is used by this detector type by default.
 Use the model configuration shown below when using the axengine detector with the default axmodel:
 ```yaml
 detectors:
  axengine:
    type: axengine
 model:
  path: frigate-yolov9-tiny
  model_type: yolo-generic
  width: 320
  height: 320
  tensor_format: bgr
  labelmap_path: /labelmap/coco-80.txt
 ```
 # Models
 Some model types are not included in Frigate by default.
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@ -104,6 +104,10 @@ Frigate supports multiple different detectors that work on different types of ha
 - [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection.
 **AXERA** <CommunityBadge />
 - [AXEngine](#axera): axera models can run on AXERA NPUs via AXEngine, delivering highly efficient object detection.
 :::
 ### Hailo-8
@ -287,6 +291,14 @@ The inference time of a rk3588 with all 3 cores enabled is typically 25-30 ms fo
 | ssd mobilenet | ~ 25 ms                         |
 | yolov5m       | ~ 118 ms                        |
 ### AXERA
 - **AXEngine** Default model is **yolov9**
 | Name             | AXERA AX650N/AX8850N Inference Time |
 | ---------------- | ----------------------------------- |
 | yolov9-tiny      | ~ 4 ms                              |
 ## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version)
 This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity.
--- a/docs/docs/frigate/installation.md
+++ b/docs/docs/frigate/installation.md
@ -287,6 +287,42 @@ or add these options to your `docker run` command:
 Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics).
 ### AXERA
 <details>
 <summary>AXERA accelerators</summary>
 AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments.
 #### Installation
 Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation.
 Follow these steps for installation:
 1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh).
 2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh`
 3. Run the script with `./user_installation.sh`
 #### Setup
 To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable`
 Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file:
 ```yaml
 devices:
  - /dev/axcl_host
  - /dev/ax_mmb_dev
  - /dev/msg_userdev
 ```
 If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev`
 #### Configuration
 Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup.
 </details>
 ## Docker
 Running through Docker with Docker Compose is the recommended install method.
--- a/frigate/detectors/plugins/axengine.py
+++ b/frigate/detectors/plugins/axengine.py
@ -0,0 +1,86 @@
 import logging
 import os.path
 import re
 import urllib.request
 from typing import Literal
 import axengine as axe
 from frigate.const import MODEL_CACHE_DIR
 from frigate.detectors.detection_api import DetectionApi
 from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
 from frigate.util.model import post_process_yolo
 logger = logging.getLogger(__name__)
 DETECTOR_KEY = "axengine"
 supported_models = {
    ModelTypeEnum.yologeneric: "frigate-yolov9-.*$",
 }
 model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/")
 class AxengineDetectorConfig(BaseDetectorConfig):
    type: Literal[DETECTOR_KEY]
 class Axengine(DetectionApi):
    type_key = DETECTOR_KEY
    def __init__(self, config: AxengineDetectorConfig):
        logger.info("__init__ axengine")
        super().__init__(config)
        self.height = config.model.height
        self.width = config.model.width
        model_path = config.model.path or "frigate-yolov9-tiny"
        model_props = self.parse_model_input(model_path)
        self.session = axe.InferenceSession(model_props["path"])
    def __del__(self):
        pass
    def parse_model_input(self, model_path):
        model_props = {}
        model_props["preset"] = True
        model_matched = False
        for model_type, pattern in supported_models.items():
            if re.match(pattern, model_path):
                model_matched = True
                model_props["model_type"] = model_type
        if model_matched:
            model_props["filename"] = model_path + ".axmodel"
            model_props["path"] = model_cache_dir + model_props["filename"]
            if not os.path.isfile(model_props["path"]):
                self.download_model(model_props["filename"])
        else:
            supported_models_str = ", ".join(model[1:-1] for model in supported_models)
            raise Exception(
                f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}"
            )
        return model_props
    def download_model(self, filename):
        if not os.path.isdir(model_cache_dir):
            os.mkdir(model_cache_dir)
        GITHUB_ENDPOINT = os.environ.get("GITHUB_ENDPOINT", "https://github.com")
        urllib.request.urlretrieve(
            f"{GITHUB_ENDPOINT}/ivanshi1108/assets/releases/download/v0.16.2/{filename}",
            model_cache_dir + filename,
        )
    def detect_raw(self, tensor_input):
        results = None
        results = self.session.run(None, {"images": tensor_input})
        if self.detector_config.model.model_type == ModelTypeEnum.yologeneric:
            return post_process_yolo(results, self.width, self.height)
        else:
            raise ValueError(
                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
            )
--- a/frigate/detectors/plugins/edgetpu_tfl.py
+++ b/frigate/detectors/plugins/edgetpu_tfl.py
@ -1,19 +1,20 @@
 import logging
 import math
 import os
 import cv2
 import numpy as np
 from pydantic import Field
 from typing_extensions import Literal
 from frigate.detectors.detection_api import DetectionApi
-from frigate.detectors.detector_config import BaseDetectorConfig
+from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
 try:
    from tflite_runtime.interpreter import Interpreter, load_delegate
 except ModuleNotFoundError:
    from tensorflow.lite.python.interpreter import Interpreter, load_delegate
 logger = logging.getLogger(__name__)
 DETECTOR_KEY = "edgetpu"
@ -26,6 +27,10 @@ class EdgeTpuDetectorConfig(BaseDetectorConfig):
 class EdgeTpuTfl(DetectionApi):
    type_key = DETECTOR_KEY
    supported_models = [
        ModelTypeEnum.ssd,
        ModelTypeEnum.yologeneric,
    ]
    def __init__(self, detector_config: EdgeTpuDetectorConfig):
        device_config = {}
@ -63,31 +68,294 @@ class EdgeTpuTfl(DetectionApi):
        self.tensor_input_details = self.interpreter.get_input_details()
        self.tensor_output_details = self.interpreter.get_output_details()
        self.model_width = detector_config.model.width
        self.model_height = detector_config.model.height
        self.min_score = 0.4
        self.max_detections = 20
        self.model_type = detector_config.model.model_type
        self.model_requires_int8 = self.tensor_input_details[0]["dtype"] == np.int8
        if self.model_type == ModelTypeEnum.yologeneric:
            logger.debug("Using YOLO preprocessing/postprocessing")
            if len(self.tensor_output_details) not in [2, 3]:
                logger.error(
                    f"Invalid count of output tensors in YOLO model. Found {len(self.tensor_output_details)}, expecting 2 or 3."
                )
                raise
            self.reg_max = 16  # = 64 dfl_channels // 4 # YOLO standard
            self.min_logit_value = np.log(
                self.min_score / (1 - self.min_score)
            )  # for filtering
            self._generate_anchors_and_strides()  # decode bounding box DFL
            self.project = np.arange(
                self.reg_max, dtype=np.float32
            )  # for decoding bounding box DFL information
            # Determine YOLO tensor indices and quantization scales for
            # boxes and class_scores the tensor ordering and names are
            # not reliable, so use tensor shape to detect which tensor
            # holds boxes or class scores.
            # The tensors have shapes (B, N, C)
            # where N is the number of candidates (=2100 for 320x320)
            # this may guess wrong if the number of classes is exactly 64
            output_boxes_index = None
            output_classes_index = None
            for i, x in enumerate(self.tensor_output_details):
                # the nominal index seems to start at 1 instead of 0
                if len(x["shape"]) == 3 and x["shape"][2] == 64:
                    output_boxes_index = i
                elif len(x["shape"]) == 3 and x["shape"][2] > 1:
                    # require the number of classes to be more than 1
                    # to differentiate from (not used) max score tensor
                    output_classes_index = i
            if output_boxes_index is None or output_classes_index is None:
                logger.warning("Unrecognized model output, unexpected tensor shapes.")
                output_classes_index = (
                    0
                    if (output_boxes_index is None or output_classes_index == 1)
                    else 1
                )  # 0 is default guess
                output_boxes_index = 1 if (output_boxes_index == 0) else 0
            scores_details = self.tensor_output_details[output_classes_index]
            self.scores_tensor_index = scores_details["index"]
            self.scores_scale, self.scores_zero_point = scores_details["quantization"]
            # calculate the quantized version of the min_score
            self.min_score_quantized = int(
                (self.min_logit_value / self.scores_scale) + self.scores_zero_point
            )
            self.logit_shift_to_positive_values = (
                max(0, math.ceil((128 + self.scores_zero_point) * self.scores_scale))
                + 1
            )  # round up
            boxes_details = self.tensor_output_details[output_boxes_index]
            self.boxes_tensor_index = boxes_details["index"]
            self.boxes_scale, self.boxes_zero_point = boxes_details["quantization"]
        elif self.model_type == ModelTypeEnum.ssd:
            logger.debug("Using SSD preprocessing/postprocessing")
            # SSD model indices (4 outputs: boxes, class_ids, scores, count)
            for x in self.tensor_output_details:
                if len(x["shape"]) == 3:
                    self.output_boxes_index = x["index"]
                elif len(x["shape"]) == 1:
                    self.output_count_index = x["index"]
            self.output_class_ids_index = None
            self.output_class_scores_index = None
        else:
            raise Exception(
                f"{self.model_type} is currently not supported for edgetpu. See the docs for more info on supported models."
            )
    def _generate_anchors_and_strides(self):
        # for decoding the bounding box DFL information into xy coordinates
        all_anchors = []
        all_strides = []
        strides = (8, 16, 32)  # YOLO's small, medium, large detection heads
        for stride in strides:
            feat_h, feat_w = self.model_height // stride, self.model_width // stride
            grid_y, grid_x = np.meshgrid(
                np.arange(feat_h, dtype=np.float32),
                np.arange(feat_w, dtype=np.float32),
                indexing="ij",
            )
            grid_coords = np.stack((grid_x.flatten(), grid_y.flatten()), axis=1)
            anchor_points = grid_coords + 0.5
            all_anchors.append(anchor_points)
            all_strides.append(np.full((feat_h * feat_w, 1), stride, dtype=np.float32))
        self.anchors = np.concatenate(all_anchors, axis=0)
        self.anchor_strides = np.concatenate(all_strides, axis=0)
    def determine_indexes_for_non_yolo_models(self):
        """Legacy method for SSD models."""
        if (
            self.output_class_ids_index is None
            or self.output_class_scores_index is None
        ):
            for i in range(4):
                index = self.tensor_output_details[i]["index"]
                if (
                    index != self.output_boxes_index
                    and index != self.output_count_index
                ):
                    if (
                        np.mod(np.float32(self.interpreter.tensor(index)()[0][0]), 1)
                        == 0.0
                    ):
                        self.output_class_ids_index = index
                    else:
                        self.output_scores_index = index
    def pre_process(self, tensor_input):
        if self.model_requires_int8:
            tensor_input = np.bitwise_xor(tensor_input, 128).view(
                np.int8
            )  # shift by -128
        return tensor_input
    def detect_raw(self, tensor_input):
        tensor_input = self.pre_process(tensor_input)
        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
        self.interpreter.invoke()
-        boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0]
+        if self.model_type == ModelTypeEnum.yologeneric:
-        class_ids = self.interpreter.tensor(self.tensor_output_details[1]["index"])()[0]
+            # Multi-tensor YOLO model with (non-standard B(H*W)C output format).
-        scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[0]
+            # (the comments indicate the shape of tensors,
-        count = int(
+            # using "2100" as the anchor count (for image size of 320x320),
-            self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0]
+            # "NC" as number of classes,
-        )
+            # "N" as the count that survive after min-score filtering)
            # TENSOR A) class scores (1, 2100, NC) with logit values
            # TENSOR B) box coordinates (1, 2100, 64) encoded as dfl scores
            # Recommend that the model clamp the logit values in tensor (A)
            # to the range [-4,+4] to preserve precision from [2%,98%]
            # and because NMS requires the min_score parameter to be >= 0
-        detections = np.zeros((20, 6), np.float32)
+            # don't dequantize scores data yet, wait until the low-confidence
            # candidates are filtered out from the overall result set.
            # This reduces the work and makes post-processing faster.
            # this method works with raw quantized numbers when possible,
            # which relies on the value of the scale factor to be >0.
            # This speeds up max and argmax operations.
            # Get max confidence for each detection and create the mask
            detections = np.zeros(
                (self.max_detections, 6), np.float32
            )  # initialize zero results
            scores_output_quantized = self.interpreter.get_tensor(
                self.scores_tensor_index
            )[0]  # (2100, NC)
            max_scores_quantized = np.max(scores_output_quantized, axis=1)  # (2100,)
            mask = max_scores_quantized >= self.min_score_quantized  # (2100,)
-        for i in range(count):
+            if not np.any(mask):
-            if scores[i] < 0.4 or i == 20:
+                return detections  # empty results
-                break
+
-            detections[i] = [
+            max_scores_filtered_shiftedpositive = (
-                class_ids[i],
+                (max_scores_quantized[mask] - self.scores_zero_point)
-                float(scores[i]),
+                * self.scores_scale
-                boxes[i][0],
+            ) + self.logit_shift_to_positive_values  # (N,1) shifted logit values
-                boxes[i][1],
+            scores_output_quantized_filtered = scores_output_quantized[mask]
-                boxes[i][2],
+
-                boxes[i][3],
+            # dequantize boxes. NMS needs them to be in float format
            # remove candidates with probabilities < threshold
            boxes_output_quantized_filtered = (
                self.interpreter.get_tensor(self.boxes_tensor_index)[0]
            )[mask]  # (N, 64)
            boxes_output_filtered = (
                boxes_output_quantized_filtered.astype(np.float32)
                - self.boxes_zero_point
            ) * self.boxes_scale
            # 2. Decode DFL to distances (ltrb)
            dfl_distributions = boxes_output_filtered.reshape(
                -1, 4, self.reg_max
            )  # (N, 4, 16)
            # Softmax over the 16 bins
            dfl_max = np.max(dfl_distributions, axis=2, keepdims=True)
            dfl_exp = np.exp(dfl_distributions - dfl_max)
            dfl_probs = dfl_exp / np.sum(dfl_exp, axis=2, keepdims=True)  # (N, 4, 16)
            # Weighted sum: (N, 4, 16) * (16,) -> (N, 4)
            distances = np.einsum("pcr,r->pc", dfl_probs, self.project)
            # Calculate box corners in pixel coordinates
            anchors_filtered = self.anchors[mask]
            anchor_strides_filtered = self.anchor_strides[mask]
            x1y1 = (
                anchors_filtered - distances[:, [0, 1]]
            ) * anchor_strides_filtered  # (N, 2)
            x2y2 = (
                anchors_filtered + distances[:, [2, 3]]
            ) * anchor_strides_filtered  # (N, 2)
            boxes_filtered_decoded = np.concatenate((x1y1, x2y2), axis=-1)  # (N, 4)
            # 9. Apply NMS. Use logit scores here to defer sigmoid()
            # until after filtering out redundant boxes
            # Shift the logit scores to be non-negative (required by cv2)
            indices = cv2.dnn.NMSBoxes(
                bboxes=boxes_filtered_decoded,
                scores=max_scores_filtered_shiftedpositive,
                score_threshold=(
                    self.min_logit_value + self.logit_shift_to_positive_values
                ),
                nms_threshold=0.4,  # should this be a model config setting?
            )
            num_detections = len(indices)
            if num_detections == 0:
                return detections  # empty results
            nms_indices = np.array(indices, dtype=np.int32).ravel()  # or .flatten()
            if num_detections > self.max_detections:
                nms_indices = nms_indices[: self.max_detections]
                num_detections = self.max_detections
            kept_logits_quantized = scores_output_quantized_filtered[nms_indices]
            class_ids_post_nms = np.argmax(kept_logits_quantized, axis=1)
            # Extract the final boxes and scores using fancy indexing
            final_boxes = boxes_filtered_decoded[nms_indices]
            final_scores_logits = (
                max_scores_filtered_shiftedpositive[nms_indices]
                - self.logit_shift_to_positive_values
            )  # Unshifted logits
            # Detections array format: [class_id, score, ymin, xmin, ymax, xmax]
            detections[:num_detections, 0] = class_ids_post_nms
            detections[:num_detections, 1] = 1.0 / (
                1.0 + np.exp(-final_scores_logits)
            )  # sigmoid
            detections[:num_detections, 2] = final_boxes[:, 1] / self.model_height
            detections[:num_detections, 3] = final_boxes[:, 0] / self.model_width
            detections[:num_detections, 4] = final_boxes[:, 3] / self.model_height
            detections[:num_detections, 5] = final_boxes[:, 2] / self.model_width
            return detections
        elif self.model_type == ModelTypeEnum.ssd:
            self.determine_indexes_for_non_yolo_models()
            boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0]
            class_ids = self.interpreter.tensor(
                self.tensor_output_details[1]["index"]
            )()[0]
            scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[
                0
            ]
            count = int(
                self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0]
            )
-        return detections
+            detections = np.zeros((self.max_detections, 6), np.float32)
            for i in range(count):
                if scores[i] < self.min_score:
                    break
                if i == self.max_detections:
                    logger.debug(f"Too many detections ({count})!")
                    break
                detections[i] = [
                    class_ids[i],
                    float(scores[i]),
                    boxes[i][0],
                    boxes[i][1],
                    boxes[i][2],
                    boxes[i][3],
                ]
            return detections
        else:
            raise Exception(
                f"{self.model_type} is currently not supported for edgetpu. See the docs for more info on supported models."
            )
Author	SHA1	Message	Date
ivanshi1108	859a10c4e5	Merge `acb17a7b50` into `9ab78f496c`	2025-12-03 09:55:51 +08:00
Dan Brown	9ab78f496c	Adds support for YOLO v9 models running on Google Coral (#21124 ) Some checks failed CI / AMD64 Build (push) Has been cancelled Details CI / ARM Build (push) Has been cancelled Details CI / Jetson Jetpack 6 (push) Has been cancelled Details CI / AMD64 Extra Build (push) Has been cancelled Details CI / ARM Extra Build (push) Has been cancelled Details CI / Synaptics Build (push) Has been cancelled Details CI / Assemble and push default build (push) Has been cancelled Details * Adds support for YOLO v9 models running on Google Coral * fix format by using ruff instead of black * Remove comment Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com> * Remove log message Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com> * revert to hard-coded settings. use ModelTypeEnum directly * remove log messages. detect invalid output tensor count * remove 1-tensor processing. add pre_process() function * check for valid model type * fix formatting * remove unused import and variable * remove tip that indicates other YOLO models may be supported. --------- Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>	2025-12-02 13:26:57 -07:00
Nicolas Mowen	8a360eecf8	Refactor ROCm Support (#21132 ) Some checks are pending CI / AMD64 Build (push) Waiting to run Details CI / ARM Build (push) Waiting to run Details CI / Jetson Jetpack 6 (push) Waiting to run Details CI / AMD64 Extra Build (push) Blocked by required conditions Details CI / ARM Extra Build (push) Blocked by required conditions Details CI / Synaptics Build (push) Blocked by required conditions Details CI / Assemble and push default build (push) Blocked by required conditions Details * Remove gfx 900 support and only keep ROCm build with all variants by default * Include C++ for JIT header compilation	2025-12-02 09:41:02 -07:00
shizhicheng	acb17a7b50	Format code based on the results of Python Checks x	2025-12-01 04:47:39 +00:00
ivanshi1108	7933a83a42	Update docs/docs/configuration/object_detectors.md Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>	2025-11-24 23:04:19 +08:00
shizhicheng	2eef58aa1d	Modify the description of AXERA in the documentation.	2025-11-24 07:04:42 +00:00
ivanshi1108	6659b7cb0f	Merge branch 'dev' into AXERA-axcl	2025-11-24 10:55:09 +08:00
shizhicheng	f134796913	format code with ruff	2025-11-24 02:42:04 +00:00
shizhicheng	b4abbd7d3b	Modify the document based on review suggestions	2025-11-24 02:20:40 +00:00
shizhicheng	438df7d484	The model inference time has been changed to the time displayed on the Frigate UI	2025-11-16 22:22:38 +08:00
shizhicheng	e27a94ae0b	Fix logical errors caused by code formatting	2025-11-11 05:54:19 +00:00
shizhicheng	1dee548dbc	Modifications to the YOLOv9 object detection model: The model is now dynamically downloaded to the cache directory. Post-processing is now done using Frigate's built-in `post_process_yolo`. Configuration in the relevant documentation has been updated.	2025-11-11 05:42:28 +00:00
shizhicheng	91e17e12b7	Change the default detection model to YOLOv9	2025-11-09 13:21:17 +00:00
ivanshi1108	bb45483e9e	Modify AXERA section from hardware.md Modify AXERA section and related content from hardware documentation.	2025-10-28 09:54:00 +08:00
shizhicheng	7b4eaf2d10	Initial commit for AXERA AI accelerators	2025-10-24 09:03:13 +00:00