This commit is contained in:
GuoQing Liu 2026-03-05 20:29:31 -05:00 committed by GitHub
commit f63e1771b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 896 additions and 21 deletions

143
.github/workflows/ax.yml vendored Normal file
View File

@ -0,0 +1,143 @@
name: AXERA
on:
workflow_dispatch:
concurrency:
group: ${{ github.ref }}
cancel-in-progress: true
env:
PYTHON_VERSION: 3.9
jobs:
x86_axcl_builds:
runs-on: ubuntu-22.04
name: x86_AXCL Build
steps:
- name: Check out code
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set x86_AXCL_TAG
run: echo "x86_AXCL_TAG=x86-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV
- name: Set Version
run: make version
- name: Build
uses: docker/bake-action@v6
with:
source: .
push: false
targets: x86-axcl
files: docker/axcl/x86-axcl.hcl
no-cache: true
set: |
x86-axcl.tags=frigate:${{ env.x86_AXCL_TAG }}
- name: Clean up disk space
run: |
docker system prune -f
- name: Save Docker image as tar file
run: |
docker save frigate:${{ env.x86_AXCL_TAG }} -o frigate-${{ env.x86_AXCL_TAG }}.tar
ls -lh frigate-${{ env.x86_AXCL_TAG }}.tar
- name: Upload Docker image artifact
uses: actions/upload-artifact@v4
with:
name: x86-axcl-docker-image
path: frigate-${{ env.x86_AXCL_TAG }}.tar
retention-days: 7
rk_axcl_builds:
runs-on: ubuntu-22.04-arm
name: rk_AXCL Build
steps:
- name: Check out code
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set RK_AXCL_TAG
run: echo "RK_AXCL_TAG=rk-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV
- name: Set Version
run: make version
- name: Build
uses: docker/bake-action@v6
with:
source: .
push: false
targets: rk-axcl
files: |
docker/rockchip/rk.hcl
docker/axcl/rk-axcl.hcl
no-cache: true
set: |
rk-axcl.tags=frigate:${{ env.RK_AXCL_TAG }}
- name: Clean up disk space
run: |
docker system prune -f
- name: Save Docker image as tar file
run: |
docker save frigate:${{ env.RK_AXCL_TAG }} -o frigate-${{ env.RK_AXCL_TAG }}.tar
ls -lh frigate-${{ env.RK_AXCL_TAG }}.tar
- name: Upload Docker image artifact
uses: actions/upload-artifact@v4
with:
name: rk-axcl-docker-image
path: frigate-${{ env.RK_AXCL_TAG }}.tar
retention-days: 7
rpi_axcl_builds:
runs-on: ubuntu-22.04-arm
name: RPi_AXCL Build
steps:
- name: Check out code
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set RPi_AXCL_TAG
run: echo "RPi_AXCL_TAG=rpi-axcl-${GITHUB_SHA:0:7}" >> $GITHUB_ENV
- name: Set Version
run: make version
- name: Build
uses: docker/bake-action@v6
with:
source: .
push: false
targets: rpi-axcl
files: |
docker/rpi/rpi.hcl
docker/axcl/rpi-axcl.hcl
no-cache: true
set: |
rpi-axcl.tags=frigate:${{ env.RPi_AXCL_TAG }}
- name: Clean up disk space
run: |
docker system prune -f
- name: Save Docker image as tar file
run: |
docker save frigate:${{ env.RPi_AXCL_TAG }} -o frigate-${{ env.RPi_AXCL_TAG }}.tar
ls -lh frigate-${{ env.RPi_AXCL_TAG }}.tar
- name: Upload Docker image artifact
uses: actions/upload-artifact@v4
with:
name: rpi-axcl-docker-image
path: frigate-${{ env.RPi_AXCL_TAG }}.tar
retention-days: 7

View File

@ -224,3 +224,29 @@ jobs:
sources: |
ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-amd64
ghcr.io/${{ steps.lowercaseRepo.outputs.lowercase }}:${{ env.SHORT_SHA }}-rpi
axera_build:
runs-on: ubuntu-22.04
name: AXERA Build
needs:
- amd64_build
- arm64_build
steps:
- name: Check out code
uses: actions/checkout@v5
with:
persist-credentials: false
- name: Set up QEMU and Buildx
id: setup
uses: ./.github/actions/setup
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Axera build
uses: docker/bake-action@v6
with:
source: .
push: true
targets: axcl
files: docker/axcl/axcl.hcl
set: |
axcl.tags=${{ steps.setup.outputs.image-name }}-axcl
*.cache-from=type=gha

25
docker/axcl/Dockerfile Normal file
View File

@ -0,0 +1,25 @@
# syntax=docker/dockerfile:1.6
# https://askubuntu.com/questions/972516/debian-frontend-environment-variable
ARG DEBIAN_FRONTEND=noninteractive
# Globally set pip break-system-packages option to avoid having to specify it every time
ARG PIP_BREAK_SYSTEM_PACKAGES=1
FROM frigate AS frigate-axcl
ARG TARGETARCH
ARG PIP_BREAK_SYSTEM_PACKAGES
# Install axpyengine
RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl
RUN pip3 install -i https://mirrors.aliyun.com/pypi/simple/ /axengine-0.1.3-py3-none-any.whl \
&& rm /axengine-0.1.3-py3-none-any.whl
# Set ldconfig path
RUN echo "/usr/lib/axcl" > /etc/ld.so.conf.d/ax.conf
# Set env
ENV PATH="$PATH:/usr/bin/axcl"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/axcl"
ENTRYPOINT ["sh", "-c", "ldconfig && exec /init"]

13
docker/axcl/axcl.hcl Normal file
View File

@ -0,0 +1,13 @@
target frigate {
dockerfile = "docker/main/Dockerfile"
platforms = ["linux/amd64", "linux/arm64"]
target = "frigate"
}
target axcl {
dockerfile = "docker/axcl/Dockerfile"
contexts = {
frigate = "target:frigate",
}
platforms = ["linux/amd64", "linux/arm64"]
}

15
docker/axcl/axcl.mk Normal file
View File

@ -0,0 +1,15 @@
BOARDS += axcl
local-axcl: version
docker buildx bake --file=docker/axcl/axcl.hcl axcl \
--set axcl.tags=frigate:latest-axcl \
--load
build-axcl: version
docker buildx bake --file=docker/axcl/axcl.hcl axcl \
--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl
push-axcl: build-axcl
docker buildx bake --file=docker/axcl/axcl.hcl axcl \
--set axcl.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-axcl \
--push

7
docker/axcl/rk-axcl.hcl Normal file
View File

@ -0,0 +1,7 @@
target rk-axcl {
dockerfile = "docker/axcl/Dockerfile"
contexts = {
frigate = "target:rk",
}
platforms = ["linux/arm64"]
}

7
docker/axcl/rpi-axcl.hcl Normal file
View File

@ -0,0 +1,7 @@
target rpi-axcl {
dockerfile = "docker/axcl/Dockerfile"
contexts = {
frigate = "target:rpi",
}
platforms = ["linux/arm64"]
}

110
docker/axcl/user_installation.sh Executable file
View File

@ -0,0 +1,110 @@
#!/bin/bash
set -e
# Function to clean up on error
cleanup() {
echo "Cleaning up temporary files..."
rm -f "$deb_file"
}
trap cleanup ERR
trap 'echo "Script interrupted by user (Ctrl+C)"; cleanup; exit 130' INT
# Update package list and install dependencies
echo "Updating package list and installing dependencies..."
sudo apt-get update
sudo apt-get install -y build-essential cmake git wget pciutils kmod udev
# Check if gcc-12 is needed
echo "Checking GCC version..."
current_gcc_version=$(gcc --version | head -n1 | awk '{print $NF}')
if ! dpkg --compare-versions "$current_gcc_version" ge "12" 2>/dev/null; then
echo "Current GCC version ($current_gcc_version) is lower than 12, installing gcc-12..."
sudo apt-get install -y gcc-12
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
echo "GCC-12 installed and set as default"
else
echo "Current GCC version ($current_gcc_version) is sufficient, skipping GCC installation"
fi
# Determine architecture
echo "Determining system architecture..."
arch=$(uname -m)
download_url=""
if [[ $arch == "x86_64" ]]; then
download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_x86_64_V3.10.2_20251111020143_NO5046.deb"
deb_file="axcl.deb"
elif [[ $arch == "aarch64" ]]; then
download_url="https://github.com/ivanshi1108/assets/releases/download/v0.17/axcl_host_aarch64_V3.10.2_20251111020143_NO5046.deb"
deb_file="axcl.deb"
else
echo "Unsupported architecture: $arch"
exit 1
fi
# Check for required Linux headers before downloading
echo "Checking for required Linux headers..."
kernel_version=$(uname -r)
if dpkg -l | grep -q "linux-headers-${kernel_version}" || [ -d "/lib/modules/${kernel_version}/build" ]; then
echo "Linux headers or kernel modules directory found for kernel ${kernel_version}/build."
else
echo "Linux headers for kernel ${kernel_version} not found. Please install them first: sudo apt-get install linux-headers-${kernel_version}"
exit 1
fi
# Download AXCL driver
echo "Downloading AXCL driver for $arch..."
wget --timeout=30 --tries=3 "$download_url" -O "$deb_file"
if [ $? -ne 0 ]; then
echo "Failed to download AXCL driver after retries"
exit 1
fi
# Install AXCL driver
echo "Installing AXCL driver..."
sudo dpkg -i "$deb_file"
if [ $? -ne 0 ]; then
echo "Failed to install AXCL driver, attempting to fix dependencies..."
sudo apt-get install -f -y
sudo dpkg -i "$deb_file"
if [ $? -ne 0 ]; then
echo "AXCL driver installation failed after dependency fix"
exit 1
fi
fi
# Update environment
echo "Updating environment..."
source /etc/profile
# Verify installation
echo "Verifying AXCL installation..."
if command -v axcl-smi &> /dev/null; then
echo "AXCL driver detected, checking AI accelerator status..."
axcl_output=$(axcl-smi 2>&1)
axcl_exit_code=$?
echo "$axcl_output"
if [ $axcl_exit_code -eq 0 ]; then
echo "AXCL driver installation completed successfully!"
else
echo "AXCL driver installed but no AI accelerator detected or communication failed."
echo "Please check if the AI accelerator is properly connected and powered on."
exit 1
fi
else
echo "axcl-smi command not found. AXCL driver installation may have failed."
exit 1
fi
# Clean up
echo "Cleaning up temporary files..."
rm -f "$deb_file"
echo "Installation script completed."

13
docker/axcl/x86-axcl.hcl Normal file
View File

@ -0,0 +1,13 @@
target frigate {
dockerfile = "docker/main/Dockerfile"
platforms = ["linux/amd64"]
target = "frigate"
}
target x86-axcl {
dockerfile = "docker/axcl/Dockerfile"
contexts = {
frigate = "target:frigate",
}
platforms = ["linux/amd64"]
}

View File

@ -49,6 +49,11 @@ Frigate supports multiple different detectors that work on different types of ha
- [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs.
**AXERA** <CommunityBadge />
- [AXEngine](#axera): axmodels can run on AXERA AI acceleration.
**For Testing**
- [CPU Detector (not recommended for actual use](#cpu-detector-not-recommended): Use a CPU to run tflite model, this is not recommended and in most cases OpenVINO can be used in CPU mode with better results.
@ -1478,6 +1483,41 @@ model:
input_pixel_format: rgb/bgr # look at the model.json to figure out which to put here
```
## AXERA
Hardware accelerated object detection is supported on the following SoCs:
- AX650N
- AX8850N
This implementation uses the [AXera Pulsar2 Toolchain](https://huggingface.co/AXERA-TECH/Pulsar2).
See the [installation docs](../frigate/installation.md#axera) for information on configuring the AXEngine hardware.
### Configuration
When configuring the AXEngine detector, you have to specify the model name.
#### yolov9
A yolov9 model is provided in the container at /axmodels and is used by this detector type by default.
Use the model configuration shown below when using the axengine detector with the default axmodel:
```yaml
detectors:
axengine:
type: axengine
model:
path: frigate-yolov9-tiny
model_type: yolo-generic
width: 320
height: 320
tensor_format: bgr
labelmap_path: /labelmap/coco-80.txt
```
# Models
Some model types are not included in Frigate by default.
@ -1571,12 +1611,12 @@ YOLOv9 model can be exported as ONNX using the command below. You can copy and p
```sh
docker build . --build-arg MODEL_SIZE=t --build-arg IMG_SIZE=320 --output . -f- <<'EOF'
FROM python:3.11 AS build
RUN apt-get update && apt-get install --no-install-recommends -y libgl1 && rm -rf /var/lib/apt/lists/*
COPY --from=ghcr.io/astral-sh/uv:0.8.0 /uv /bin/
RUN apt-get update && apt-get install --no-install-recommends -y cmake libgl1 && rm -rf /var/lib/apt/lists/*
COPY --from=ghcr.io/astral-sh/uv:0.10.4 /uv /bin/
WORKDIR /yolov9
ADD https://github.com/WongKinYiu/yolov9.git .
RUN uv pip install --system -r requirements.txt
RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier>=0.4.1 onnxscript
RUN uv pip install --system onnx==1.18.0 onnxruntime onnx-simplifier==0.4.* onnxscript
ARG MODEL_SIZE
ARG IMG_SIZE
ADD https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-${MODEL_SIZE}-converted.pt yolov9-${MODEL_SIZE}.pt

View File

@ -103,6 +103,10 @@ Frigate supports multiple different detectors that work on different types of ha
- [Synaptics](#synaptics): synap models can run on Synaptics devices(e.g astra machina) with included NPUs to provide efficient object detection.
**AXERA** <CommunityBadge />
- [AXEngine](#axera): axera models can run on AXERA NPUs via AXEngine, delivering highly efficient object detection.
:::
### Hailo-8
@ -288,6 +292,14 @@ The inference time of a rk3588 with all 3 cores enabled is typically 25-30 ms fo
| ssd mobilenet | ~ 25 ms |
| yolov5m | ~ 118 ms |
### AXERA
- **AXEngine** Default model is **yolov9**
| Name | AXERA AX650N/AX8850N Inference Time |
| ---------------- | ----------------------------------- |
| yolov9-tiny | ~ 4 ms |
## What does Frigate use the CPU for and what does it use a detector for? (ELI5 Version)
This is taken from a [user question on reddit](https://www.reddit.com/r/homeassistant/comments/q8mgau/comment/hgqbxh5/?utm_source=share&utm_medium=web2x&context=3). Modified slightly for clarity.
@ -308,4 +320,4 @@ Basically - When you increase the resolution and/or the frame rate of the stream
YES! The Coral does not help with decoding video streams.
Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work.
Decompressing video streams takes a significant amount of CPU power. Video compression uses key frames (also known as I-frames) to send a full frame in the video stream. The following frames only include the difference from the key frame, and the CPU has to compile each frame by merging the differences with the key frame. [More detailed explanation](https://support.video.ibm.com/hc/en-us/articles/18106203580316-Keyframes-InterFrame-Video-Compression). Higher resolutions and frame rates mean more processing power is needed to decode the video stream, so try and set them on the camera to avoid unnecessary decoding work.

View File

@ -439,6 +439,42 @@ or add these options to your `docker run` command:
Next, you should configure [hardware object detection](/configuration/object_detectors#synaptics) and [hardware video processing](/configuration/hardware_acceleration_video#synaptics).
### AXERA
<details>
<summary>AXERA accelerators</summary>
AXERA accelerators are available in an M.2 form factor, compatible with both Raspberry Pi and Orange Pi. This form factor has also been successfully tested on x86 platforms, making it a versatile choice for various computing environments.
#### Installation
Using AXERA accelerators requires the installation of the AXCL driver. We provide a convenient Linux script to complete this installation.
Follow these steps for installation:
1. Copy or download [this script](https://github.com/ivanshi1108/assets/releases/download/v0.16.2/user_installation.sh).
2. Ensure it has execution permissions with `sudo chmod +x user_installation.sh`
3. Run the script with `./user_installation.sh`
#### Setup
To set up Frigate, follow the default installation instructions, for example: `ghcr.io/blakeblackshear/frigate:stable`
Next, grant Docker permissions to access your hardware by adding the following lines to your `docker-compose.yml` file:
```yaml
devices:
- /dev/axcl_host
- /dev/ax_mmb_dev
- /dev/msg_userdev
```
If you are using `docker run`, add this option to your command `--device /dev/axcl_host --device /dev/ax_mmb_dev --device /dev/msg_userdev`
#### Configuration
Finally, configure [hardware object detection](/configuration/object_detectors#axera) to complete the setup.
</details>
## Docker
Running through Docker with Docker Compose is the recommended install method.

View File

@ -37,18 +37,18 @@ The following diagram adds a lot more detail than the simple view explained befo
%%{init: {"themeVariables": {"edgeLabelBackground": "transparent"}}}%%
flowchart TD
RecStore[(Recording\nstore)]
SnapStore[(Snapshot\nstore)]
RecStore[(Recording<br>store)]
SnapStore[(Snapshot<br>store)]
subgraph Acquisition
Cam["Camera"] -->|FFmpeg supported| Stream
Cam -->|"Other streaming\nprotocols"| go2rtc
Cam -->|"Other streaming<br>protocols"| go2rtc
go2rtc("go2rtc") --> Stream
Stream[Capture main and\nsub streams] --> |detect stream|Decode(Decode and\ndownscale)
Stream[Capture main and<br>sub streams] --> |detect stream|Decode(Decode and<br>downscale)
end
subgraph Motion
Decode --> MotionM(Apply\nmotion masks)
MotionM --> MotionD(Motion\ndetection)
Decode --> MotionM(Apply<br>motion masks)
MotionM --> MotionD(Motion<br>detection)
end
subgraph Detection
MotionD --> |motion regions| ObjectD(Object detection)
@ -60,8 +60,8 @@ flowchart TD
MotionD --> |motion event|Birdseye
ObjectZ --> |object event|Birdseye
MotionD --> |"video segments\n(retain motion)"|RecStore
MotionD --> |"video segments<br>(retain motion)"|RecStore
ObjectZ --> |detection clip|RecStore
Stream -->|"video segments\n(retain all)"| RecStore
Stream -->|"video segments<br>(retain all)"| RecStore
ObjectZ --> |detection snapshot|SnapStore
```

View File

@ -19,6 +19,7 @@ __all__ = [
class SemanticSearchModelEnum(str, Enum):
jinav1 = "jinav1"
jinav2 = "jinav2"
ax_jinav2 = "ax_jinav2"
class EnrichmentsDeviceEnum(str, Enum):

View File

@ -0,0 +1,86 @@
import logging
import os.path
import re
import urllib.request
from typing import Literal
import axengine as axe
from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
from frigate.util.model import post_process_yolo
logger = logging.getLogger(__name__)
DETECTOR_KEY = "axengine"
supported_models = {
ModelTypeEnum.yologeneric: "frigate-yolov9-.*$",
}
model_cache_dir = os.path.join(MODEL_CACHE_DIR, "axengine_cache/")
class AxengineDetectorConfig(BaseDetectorConfig):
type: Literal[DETECTOR_KEY]
class Axengine(DetectionApi):
type_key = DETECTOR_KEY
def __init__(self, config: AxengineDetectorConfig):
logger.info("__init__ axengine")
super().__init__(config)
self.height = config.model.height
self.width = config.model.width
model_path = config.model.path or "frigate-yolov9-tiny"
model_props = self.parse_model_input(model_path)
self.session = axe.InferenceSession(model_props["path"])
def __del__(self):
pass
def parse_model_input(self, model_path):
model_props = {}
model_props["preset"] = True
model_matched = False
for model_type, pattern in supported_models.items():
if re.match(pattern, model_path):
model_matched = True
model_props["model_type"] = model_type
if model_matched:
model_props["filename"] = model_path + ".axmodel"
model_props["path"] = model_cache_dir + model_props["filename"]
if not os.path.isfile(model_props["path"]):
self.download_model(model_props["filename"])
else:
supported_models_str = ", ".join(model[1:-1] for model in supported_models)
raise Exception(
f"Model {model_path} is unsupported. Provide your own model or choose one of the following: {supported_models_str}"
)
return model_props
def download_model(self, filename):
if not os.path.isdir(model_cache_dir):
os.mkdir(model_cache_dir)
GITHUB_ENDPOINT = os.environ.get("GITHUB_ENDPOINT", "https://github.com")
urllib.request.urlretrieve(
f"{GITHUB_ENDPOINT}/ivanshi1108/assets/releases/download/v0.16.2/{filename}",
model_cache_dir + filename,
)
def detect_raw(self, tensor_input):
results = None
results = self.session.run(None, {"images": tensor_input})
if self.detector_config.model.model_type == ModelTypeEnum.yologeneric:
return post_process_yolo(results, self.width, self.height)
else:
raise ValueError(
f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
)

View File

@ -30,6 +30,7 @@ from frigate.util.file import get_event_thumbnail_bytes
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
from .onnx.jina_v2_embedding import JinaV2Embedding
from .onnx.jina_v2_embedding_ax import AXJinaV2Embedding
logger = logging.getLogger(__name__)
@ -118,6 +119,18 @@ class Embeddings:
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
elif self.config.semantic_search.model == SemanticSearchModelEnum.ax_jinav2:
# AXJinaV2Embedding instance for both text and vision
self.embedding = AXJinaV2Embedding(
model_size=self.config.semantic_search.model_size,
requestor=self.requestor,
)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
else: # Default to jinav1
self.text_embedding = JinaV1TextEmbedding(
model_size=config.semantic_search.model_size,

View File

@ -0,0 +1,281 @@
"""AX JinaV2 Embeddings."""
import io
import logging
import os
import threading
from typing import Any
import numpy as np
from PIL import Image
from transformers import AutoTokenizer
from transformers.utils.logging import disable_progress_bar, set_verbosity_error
from frigate.const import MODEL_CACHE_DIR
from frigate.embeddings.onnx.base_embedding import BaseEmbedding
from frigate.comms.inter_process import InterProcessRequestor
from frigate.util.downloader import ModelDownloader
from frigate.types import ModelStatusTypesEnum
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
import axengine as axe
# disables the progress bar and download logging for downloading tokenizers and image processors
disable_progress_bar()
set_verbosity_error()
logger = logging.getLogger(__name__)
class AXClipRunner:
def __init__(self, image_encoder_path: str, text_encoder_path: str):
self.image_encoder_path = image_encoder_path
self.text_encoder_path = text_encoder_path
self.image_encoder_runner = axe.InferenceSession(image_encoder_path)
self.text_encoder_runner = axe.InferenceSession(text_encoder_path)
for input in self.image_encoder_runner.get_inputs():
logger.info(f"{input.name} {input.shape} {input.dtype}")
for output in self.image_encoder_runner.get_outputs():
logger.info(f"{output.name} {output.shape} {output.dtype}")
for input in self.text_encoder_runner.get_inputs():
logger.info(f"{input.name} {input.shape} {input.dtype}")
for output in self.text_encoder_runner.get_outputs():
logger.info(f"{output.name} {output.shape} {output.dtype}")
def run(self, onnx_inputs):
text_embeddings = []
image_embeddings = []
if "input_ids" in onnx_inputs:
for input_ids in onnx_inputs["input_ids"]:
input_ids = input_ids.reshape(1, -1)
text_embeddings.append(
self.text_encoder_runner.run(None, {"inputs_id": input_ids})[0][0]
)
if "pixel_values" in onnx_inputs:
for pixel_values in onnx_inputs["pixel_values"]:
if len(pixel_values.shape) == 3:
pixel_values = pixel_values[None, ...]
image_embeddings.append(
self.image_encoder_runner.run(None, {"pixel_values": pixel_values})[
0
][0]
)
return np.array(text_embeddings), np.array(image_embeddings)
class AXJinaV2Embedding(BaseEmbedding):
def __init__(
self,
model_size: str,
requestor: InterProcessRequestor,
device: str = "AUTO",
embedding_type: str = None,
):
HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
super().__init__(
model_name="AXERA-TECH/jina-clip-v2",
model_file=None,
download_urls={
"image_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/image_encoder.axmodel",
"text_encoder.axmodel": f"{HF_ENDPOINT}/AXERA-TECH/jina-clip-v2/resolve/main/text_encoder.axmodel",
},
)
self.tokenizer_source = "jinaai/jina-clip-v2"
self.tokenizer_file = "tokenizer"
self.embedding_type = embedding_type
self.requestor = requestor
self.model_size = model_size
self.device = device
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
self.tokenizer = None
self.image_processor = None
self.runner = None
self.mean = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32)
self.std = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32)
# Lock to prevent concurrent calls (text and vision share this instance)
self._call_lock = threading.Lock()
# download the model and tokenizer
files_names = list(self.download_urls.keys()) + [self.tokenizer_file]
if not all(
os.path.exists(os.path.join(self.download_path, n)) for n in files_names
):
logger.debug(f"starting model download for {self.model_name}")
self.downloader = ModelDownloader(
model_name=self.model_name,
download_path=self.download_path,
file_names=files_names,
download_func=self._download_model,
)
self.downloader.ensure_model_files()
# Avoid lazy loading in worker threads: block until downloads complete
# and load the model on the main thread during initialization.
self._load_model_and_utils()
else:
self.downloader = None
ModelDownloader.mark_files_state(
self.requestor,
self.model_name,
files_names,
ModelStatusTypesEnum.downloaded,
)
self._load_model_and_utils()
logger.debug(f"models are already downloaded for {self.model_name}")
def _download_model(self, path: str):
try:
file_name = os.path.basename(path)
if file_name in self.download_urls:
ModelDownloader.download_from_url(self.download_urls[file_name], path)
elif file_name == self.tokenizer_file:
tokenizer = AutoTokenizer.from_pretrained(
self.tokenizer_source,
trust_remote_code=True,
cache_dir=os.path.join(
MODEL_CACHE_DIR, self.model_name, "tokenizer"
),
clean_up_tokenization_spaces=True,
)
tokenizer.save_pretrained(path)
self.requestor.send_data(
UPDATE_MODEL_STATE,
{
"model": f"{self.model_name}-{file_name}",
"state": ModelStatusTypesEnum.downloaded,
},
)
except Exception:
self.requestor.send_data(
UPDATE_MODEL_STATE,
{
"model": f"{self.model_name}-{file_name}",
"state": ModelStatusTypesEnum.error,
},
)
def _load_model_and_utils(self):
if self.runner is None:
if self.downloader:
self.downloader.wait_for_download()
self.tokenizer = AutoTokenizer.from_pretrained(
self.tokenizer_source,
cache_dir=os.path.join(MODEL_CACHE_DIR, self.model_name, "tokenizer"),
trust_remote_code=True,
clean_up_tokenization_spaces=True,
)
self.runner = AXClipRunner(
os.path.join(self.download_path, "image_encoder.axmodel"),
os.path.join(self.download_path, "text_encoder.axmodel"),
)
def _preprocess_image(self, image_data: bytes | Image.Image):
"""
Manually preprocess a single image from bytes or PIL.Image to (3, 512, 512).
"""
if isinstance(image_data, bytes):
image = Image.open(io.BytesIO(image_data))
else:
image = image_data
if image.mode != "RGB":
image = image.convert("RGB")
image = image.resize((512, 512), Image.Resampling.LANCZOS)
# Convert to numpy array, normalize to [0, 1], and transpose to (channels, height, width)
image_array = np.array(image, dtype=np.float32) / 255.0
# Normalize using mean and std
image_array = (image_array - self.mean) / self.std
image_array = np.transpose(image_array, (2, 0, 1)) # (H, W, C) -> (C, H, W)
return image_array
def _preprocess_inputs(self, raw_inputs):
"""
Preprocess inputs into a list of real input tensors (no dummies).
- For text: Returns list of input_ids.
- For vision: Returns list of pixel_values.
"""
if not isinstance(raw_inputs, list):
raw_inputs = [raw_inputs]
processed = []
if self.embedding_type == "text":
for text in raw_inputs:
input_ids = self.tokenizer(
[text], return_tensors="np", padding="max_length", max_length=50
)["input_ids"]
input_ids = input_ids.astype(np.int32)
processed.append(input_ids)
elif self.embedding_type == "vision":
for img in raw_inputs:
pixel_values = self._preprocess_image(img)
processed.append(
pixel_values[np.newaxis, ...]
) # Add batch dim: (1, 3, 512, 512)
else:
raise ValueError(
f"Invalid embedding_type: {self.embedding_type}. Must be 'text' or 'vision'."
)
return processed
def _postprocess_outputs(self, outputs):
"""
Process ONNX model outputs, truncating each embedding in the array to truncate_dim.
- outputs: NumPy array of embeddings.
- Returns: List of truncated embeddings.
"""
# size of vector in database
truncate_dim = 768
# jina v2 defaults to 1024 and uses Matryoshka representation, so
# truncating only causes an extremely minor decrease in retrieval accuracy
if outputs.shape[-1] > truncate_dim:
outputs = outputs[..., :truncate_dim]
return outputs
def __call__(
self, inputs: list[str] | list[Image.Image] | list[str], embedding_type=None
):
# Lock the entire call to prevent race conditions when text and vision
# embeddings are called concurrently from different threads
with self._call_lock:
self.embedding_type = embedding_type
if not self.embedding_type:
raise ValueError(
"embedding_type must be specified either in __init__ or __call__"
)
self._load_model_and_utils()
processed = self._preprocess_inputs(inputs)
# Prepare ONNX inputs with matching batch sizes
onnx_inputs = {}
if self.embedding_type == "text":
onnx_inputs["input_ids"] = np.stack([x[0] for x in processed])
elif self.embedding_type == "vision":
onnx_inputs["pixel_values"] = np.stack([x[0] for x in processed])
else:
raise ValueError("Invalid embedding type")
# Run inference
text_embeddings, image_embeddings = self.runner.run(onnx_inputs)
if self.embedding_type == "text":
embeddings = text_embeddings # text embeddings
elif self.embedding_type == "vision":
embeddings = image_embeddings # image embeddings
else:
raise ValueError("Invalid embedding type")
embeddings = self._postprocess_outputs(embeddings)
return [embedding for embedding in embeddings]

View File

@ -292,10 +292,13 @@ export default function Explore() {
const modelVersion = config?.semantic_search.model || "jinav1";
const modelSize = config?.semantic_search.model_size || "small";
const isAxJinaV2 = modelVersion === "ax_jinav2";
// Text model state
const { payload: textModelState } = useModelState(
modelVersion === "jinav1"
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel"
: modelVersion === "jinav1"
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
: modelSize === "large"
? "jinaai/jina-clip-v2-model_fp16.onnx"
@ -304,14 +307,18 @@ export default function Explore() {
// Tokenizer state
const { payload: textTokenizerState } = useModelState(
modelVersion === "jinav1"
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-tokenizer"
: modelVersion === "jinav1"
? "jinaai/jina-clip-v1-tokenizer"
: "jinaai/jina-clip-v2-tokenizer",
);
// Vision model state (same as text model for jinav2)
const visionModelFile =
modelVersion === "jinav1"
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel"
: modelVersion === "jinav1"
? modelSize === "large"
? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
: "jinaai/jina-clip-v1-vision_model_quantized.onnx"
@ -321,13 +328,49 @@ export default function Explore() {
const { payload: visionModelState } = useModelState(visionModelFile);
// Preprocessor/feature extractor state
const { payload: visionFeatureExtractorState } = useModelState(
const { payload: visionFeatureExtractorStateRaw } = useModelState(
modelVersion === "jinav1"
? "jinaai/jina-clip-v1-preprocessor_config.json"
: "jinaai/jina-clip-v2-preprocessor_config.json",
);
const visionFeatureExtractorState = useMemo(() => {
if (isAxJinaV2) {
return visionModelState ?? "downloading";
}
return visionFeatureExtractorStateRaw;
}, [isAxJinaV2, visionModelState, visionFeatureExtractorStateRaw]);
const effectiveTextModelState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return textModelState ?? "downloading";
}
return textModelState;
}, [isAxJinaV2, textModelState]);
const effectiveTextTokenizerState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return textTokenizerState ?? "downloading";
}
return textTokenizerState;
}, [isAxJinaV2, textTokenizerState]);
const effectiveVisionModelState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return visionModelState ?? "downloading";
}
return visionModelState;
}, [isAxJinaV2, visionModelState]);
const allModelsLoaded = useMemo(() => {
if (isAxJinaV2) {
return (
effectiveTextModelState === "downloaded" &&
effectiveTextTokenizerState === "downloaded" &&
effectiveVisionModelState === "downloaded"
);
}
return (
textModelState === "downloaded" &&
textTokenizerState === "downloaded" &&
@ -335,6 +378,10 @@ export default function Explore() {
visionFeatureExtractorState === "downloaded"
);
}, [
isAxJinaV2,
effectiveTextModelState,
effectiveTextTokenizerState,
effectiveVisionModelState,
textModelState,
textTokenizerState,
visionModelState,
@ -358,10 +405,10 @@ export default function Explore() {
!defaultViewLoaded ||
(config?.semantic_search.enabled &&
(!reindexState ||
!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))
!(isAxJinaV2 ? effectiveTextModelState : textModelState) ||
!(isAxJinaV2 ? effectiveTextTokenizerState : textTokenizerState) ||
!(isAxJinaV2 ? effectiveVisionModelState : visionModelState) ||
(!isAxJinaV2 && !visionFeatureExtractorState)))
) {
return (
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />

View File

@ -28,7 +28,7 @@ export interface FaceRecognitionConfig {
recognition_threshold: number;
}
export type SearchModel = "jinav1" | "jinav2";
export type SearchModel = "jinav1" | "jinav2" | "ax_jinav2";
export type SearchModelSize = "small" | "large";
export interface CameraConfig {