diff --git a/docker/tensorrt/Dockerfile.arm64 b/docker/tensorrt/Dockerfile.arm64 new file mode 100644 index 000000000..e0fcd8a8e --- /dev/null +++ b/docker/tensorrt/Dockerfile.arm64 @@ -0,0 +1,50 @@ +# syntax=docker/dockerfile:1.4 + +# https://askubuntu.com/questions/972516/debian-frontend-environment-variable +ARG DEBIAN_FRONTEND=noninteractive + +FROM wheels AS trt-wheels +ARG DEBIAN_FRONTEND +ARG TARGETARCH + +# python-tensorrt build deps are 3.4 GB! +RUN apt-get update \ + && apt-get install -y ccache cuda-cudart-dev-* cuda-nvcc-* libnvonnxparsers-dev libnvparsers-dev libnvinfer-plugin-dev \ + && ([ -e /usr/local/cuda ] || ln -s /usr/local/cuda-* /usr/local/cuda) \ + && rm -rf /var/lib/apt/lists/*; + +# Determine version of tensorrt already installed in base image, e.g. "Version: 8.4.1-1+cuda11.4" +RUN NVINFER_VER=$(dpkg -s libnvinfer8 | grep -Po "Version: \K.*") \ + && echo $NVINFER_VER | grep -Po "^\d+\.\d+\.\d+" > /etc/TENSORRT_VER + +RUN --mount=type=bind,source=docker/tensorrt/detector/build_python_tensorrt.sh,target=/deps/build_python_tensorrt.sh \ + --mount=type=cache,target=/root/.ccache \ + export PATH="/usr/lib/ccache:$PATH" CCACHE_DIR=/root/.ccache CCACHE_MAXSIZE=2G \ + && TENSORRT_VER=$(cat /etc/TENSORRT_VER) /deps/build_python_tensorrt.sh + +COPY docker/tensorrt/requirements-arm64.txt /requirements-tensorrt.txt +RUN pip3 wheel --wheel-dir=/trt-wheels -r /requirements-tensorrt.txt + +FROM wheels AS trt-model-wheels +ARG DEBIAN_FRONTEND + +RUN apt-get update \ + && apt-get install -y protobuf-compiler libprotobuf-dev \ + && rm -rf /var/lib/apt/lists/* +RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \ + pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt + +# Frigate w/ TensorRT for NVIDIA Jetson platforms +FROM tensorrt-base AS frigate-tensorrt +RUN apt-get update \ + && apt-get install -y python-is-python3 libprotobuf17 \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER +RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ + --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \ + pip3 install -U /deps/trt-wheels/*.whl /deps/trt-model-wheels/*.whl \ + && ldconfig + +WORKDIR /opt/frigate/ +COPY --from=rootfs / / diff --git a/docker/tensorrt/detector/build_python_tensorrt.sh b/docker/tensorrt/detector/build_python_tensorrt.sh new file mode 100755 index 000000000..21b6ae268 --- /dev/null +++ b/docker/tensorrt/detector/build_python_tensorrt.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -euxo pipefail + +mkdir -p /trt-wheels + +if [[ "${TARGETARCH}" == "arm64" ]]; then + + # NVIDIA supplies python-tensorrt for python3.8, but frigate uses python3.9, + # so we must build python-tensorrt ourselves. + + # Get python-tensorrt source + mkdir /workspace + cd /workspace + git clone -b ${TENSORRT_VER} https://github.com/NVIDIA/TensorRT.git --depth=1 + + # Collect dependencies + EXT_PATH=/workspace/external && mkdir -p $EXT_PATH + pip3 install pybind11 && ln -s /usr/local/lib/python3.9/dist-packages/pybind11 $EXT_PATH/pybind11 + ln -s /usr/include/python3.9 $EXT_PATH/python3.9 + ln -s /usr/include/aarch64-linux-gnu/NvOnnxParser.h /workspace/TensorRT/parsers/onnx/ + + # Build wheel + cd /workspace/TensorRT/python + EXT_PATH=$EXT_PATH PYTHON_MAJOR_VERSION=3 PYTHON_MINOR_VERSION=9 TARGET_ARCHITECTURE=aarch64 /bin/bash ./build.sh + mv build/dist/*.whl /trt-wheels/ + +fi diff --git a/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run b/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run index 343636f1a..93b132a38 100755 --- a/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run +++ b/docker/tensorrt/detector/rootfs/etc/s6-overlay/s6-rc.d/trt-model-prepare/run @@ -2,15 +2,21 @@ # shellcheck shell=bash # Generate models for the TensorRT detector +# One or more comma-separated models may be specified via the YOLO_MODELS env. +# Append "-dla" to the model name to generate a DLA model with GPU fallback; +# otherwise a GPU-only model will be generated. + set -o errexit -o nounset -o pipefail MODEL_CACHE_DIR=${MODEL_CACHE_DIR:-"/config/model_cache/tensorrt"} +TRT_VER=${TRT_VER:-$(cat /etc/TENSORRT_VER)} OUTPUT_FOLDER="${MODEL_CACHE_DIR}/${TRT_VER}" # Create output folder mkdir -p ${OUTPUT_FOLDER} FIRST_MODEL=true +MODEL_DOWNLOAD="" MODEL_CONVERT="" for model in ${YOLO_MODELS//,/ } @@ -20,9 +26,11 @@ do if [[ ! -f ${OUTPUT_FOLDER}/${model}.trt ]]; then if [[ ${FIRST_MODEL} = true ]]; then + MODEL_DOWNLOAD="${model%-dla}"; MODEL_CONVERT="${model}" FIRST_MODEL=false; else + MODEL_DOWNLOAD+=",${model%-dla}"; MODEL_CONVERT+=",${model}"; fi else @@ -35,6 +43,25 @@ if [[ -z ${MODEL_CONVERT} ]]; then exit 0 fi +# On Jetpack 4.6, the nvidia container runtime will mount several host nvidia libraries into the +# container which should not be present in the image - if they are, TRT model generation will +# fail or produce invalid models. Thus we must request the user to install them on the host in +# order to run libyolo here. +# On Jetpack 5.0, these libraries are not mounted by the runtime and are supplied by the image. +if [[ "$(arch)" == "aarch64" ]]; then + if [[ ! -e /usr/lib/aarch64-linux-gnu/tegra ]]; then + echo "ERROR: Container must be launched with nvidia runtime" + exit 1 + elif [[ ! -e /usr/lib/aarch64-linux-gnu/libnvinfer.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvparsers.so.8 || + ! -e /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.8 ]]; then + echo "ERROR: Please run the following on the HOST:" + echo " sudo apt install libnvinfer8 libnvinfer-plugin8 libnvparsers8 libnvonnxparsers8 nvidia-container" + exit 1 + fi +fi + echo "Generating the following TRT Models: ${MODEL_CONVERT}" # Build trt engine @@ -45,10 +72,14 @@ echo "Downloading yolo weights" for model in ${MODEL_CONVERT//,/ } do - python3 yolo_to_onnx.py -m ${model} > /dev/null + python3 yolo_to_onnx.py -m ${model%-dla} > /dev/null echo -e "\nGenerating ${model}.trt. This may take a few minutes.\n"; start=$(date +%s) - cmd="python3 onnx_to_tensorrt.py -m ${model}" + if [[ $model == *-dla ]]; then + cmd="python3 onnx_to_tensorrt.py -m ${model%-dla} --dla_core 0" + else + cmd="python3 onnx_to_tensorrt.py -m ${model}" + fi $cmd > /tmp/onnx_to_tensorrt.log || { cat /tmp/onnx_to_tensorrt.log && continue; } mv ${model%-dla}.trt ${OUTPUT_FOLDER}/${model}.trt; diff --git a/docker/tensorrt/requirements-arm64.txt b/docker/tensorrt/requirements-arm64.txt new file mode 100644 index 000000000..9b12dac33 --- /dev/null +++ b/docker/tensorrt/requirements-arm64.txt @@ -0,0 +1 @@ +cuda-python == 11.7; platform_machine == 'aarch64' diff --git a/docker/tensorrt/requirements-models-arm64.txt b/docker/tensorrt/requirements-models-arm64.txt new file mode 100644 index 000000000..9d4ae7b44 --- /dev/null +++ b/docker/tensorrt/requirements-models-arm64.txt @@ -0,0 +1,3 @@ +onnx == 1.9.0; platform_machine == 'aarch64' +protobuf == 3.20.3; platform_machine == 'aarch64' +numpy == 1.23.*; platform_machine == 'aarch64' diff --git a/docker/tensorrt/trt.hcl b/docker/tensorrt/trt.hcl index c30856bb5..c305f42cf 100644 --- a/docker/tensorrt/trt.hcl +++ b/docker/tensorrt/trt.hcl @@ -1,8 +1,22 @@ variable "ARCH" { default = "amd64" } +variable "BASE_IMAGE" { + default = null +} +variable "SLIM_BASE" { + default = null +} +variable "TRT_BASE" { + default = null +} target "_build_args" { + args = { + BASE_IMAGE = BASE_IMAGE, + SLIM_BASE = SLIM_BASE, + TRT_BASE = TRT_BASE + } platforms = ["linux/${ARCH}"] } diff --git a/docker/tensorrt/trt.mk b/docker/tensorrt/trt.mk index b60409abb..0e01c1402 100644 --- a/docker/tensorrt/trt.mk +++ b/docker/tensorrt/trt.mk @@ -1,12 +1,26 @@ BOARDS += trt +JETPACK4_BASE ?= timongentzsch/l4t-ubuntu20-opencv:latest # L4T 32.7.1 JetPack 4.6.1 +JETPACK5_BASE ?= nvcr.io/nvidia/l4t-tensorrt:r8.5.2-runtime # L4T 35.3.1 JetPack 5.1.1 X86_DGPU_ARGS := ARCH=amd64 +JETPACK4_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK4_BASE) SLIM_BASE=$(JETPACK4_BASE) TRT_BASE=$(JETPACK4_BASE) +JETPACK5_ARGS := ARCH=arm64 BASE_IMAGE=$(JETPACK5_BASE) SLIM_BASE=$(JETPACK5_BASE) TRT_BASE=$(JETPACK5_BASE) local-trt: version $(X86_DGPU_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt tensorrt +local-trt-jp4: version + $(JETPACK4_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp4 tensorrt + +local-trt-jp5: version + $(JETPACK5_ARGS) docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:latest-tensorrt-jp5 tensorrt + build-trt: $(X86_DGPU_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt + $(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt + $(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt push-trt: build-trt $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt + $(JETPACK4_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt + $(JETPACK5_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 75522a33e..c6da4b9fe 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -196,6 +196,8 @@ The Frigate image will generate model files during startup if the specified mode By default, the `yolov7-tiny-416` model will be generated, but this can be overridden by specifying the `YOLO_MODELS` environment variable in Docker. One or more models may be listed in a comma-separated format, and each one will be generated. To select no model generation, set the variable to an empty string, `YOLO_MODELS=""`. Models will only be generated if the corresponding `{model}.trt` file is not present in the `model_cache` folder, so you can force a model to be regenerated by deleting it from your Frigate data folder. +If you have a Jetson device with DLAs (Xavier or Orin), you can generate a model that will run on the DLA by appending `-dla` to your model name, e.g. specify `YOLO_MODELS=yolov7-tiny-416-dla`. The model will run on DLA0 (Frigate does not currently support DLA1). DLA-incompatible layers will fall back to running on the GPU. + If your GPU does not support FP16 operations, you can pass the environment variable `USE_FP16=False` to disable it. Specific models can be selected by passing an environment variable to the `docker run` command or in your `docker-compose.yml` file. Use the form `-e YOLO_MODELS=yolov4-416,yolov4-tiny-416` to select one or more model names. The models available are shown below. diff --git a/frigate/stats.py b/frigate/stats.py index 5fdc671ee..da5eb358e 100644 --- a/frigate/stats.py +++ b/frigate/stats.py @@ -22,6 +22,7 @@ from frigate.util.services import ( get_bandwidth_stats, get_cpu_stats, get_intel_gpu_stats, + get_jetson_stats, get_nvidia_gpu_stats, ) from frigate.version import VERSION @@ -180,6 +181,15 @@ async def set_gpu_stats( else: stats["nvidia-gpu"] = {"gpu": -1, "mem": -1} hwaccel_errors.append(args) + elif "nvmpi" in args or "jetson" in args: + # nvidia Jetson + jetson_usage = get_jetson_stats() + + if jetson_usage: + stats["jetson-gpu"] = jetson_usage + else: + stats["jetson-gpu"] = {"gpu": -1, "mem": -1} + hwaccel_errors.append(args) elif "qsv" in args: if not config.telemetry.stats.intel_gpu_stats: continue diff --git a/frigate/util/services.py b/frigate/util/services.py index 507ee76ea..3c591feb9 100644 --- a/frigate/util/services.py +++ b/frigate/util/services.py @@ -309,6 +309,21 @@ def get_nvidia_gpu_stats() -> dict[int, dict]: return results +def get_jetson_stats() -> dict[int, dict]: + results = {} + + try: + results["mem"] = "-" # no discrete gpu memory + + with open("/sys/devices/gpu.0/load", "r") as f: + gpuload = float(f.readline()) / 10 + results["gpu"] = f"{gpuload}%" + except Exception: + return None + + return results + + def ffprobe_stream(path: str) -> sp.CompletedProcess: """Run ffprobe on stream.""" clean_path = escape_special_characters(path)