Add Jetson ffmpeg hwaccel

2026-02-05 10:45:21 +03:00 · 2023-07-18 12:55:45 -04:00 · 2023-07-18 12:55:45 -04:00 · b0d94a0ce7
commit b0d94a0ce7
parent 288828750c
8 changed files with 170 additions and 15 deletions
--- a/docker/tensorrt/Dockerfile.arm64
+++ b/docker/tensorrt/Dockerfile.arm64
@ -34,12 +34,23 @@ RUN apt-get update \
 RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \
    pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt

+FROM wget AS jetson-ffmpeg
+ARG DEBIAN_FRONTEND
+ENV CCACHE_DIR /root/.ccache
+ENV CCACHE_MAXSIZE 2G
+RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \
+    --mount=type=cache,target=/root/.ccache \
+    /deps/build_jetson_ffmpeg.sh
+
 # Frigate w/ TensorRT for NVIDIA Jetson platforms
 FROM tensorrt-base AS frigate-tensorrt
 RUN apt-get update \
    && apt-get install -y python-is-python3 libprotobuf17  \
    && rm -rf /var/lib/apt/lists/*

+RUN rm -rf /usr/lib/btbn-ffmpeg/
+COPY --from=jetson-ffmpeg /rootfs /
+
 COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER
 RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
    --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \
--- a/docker/tensorrt/build_jetson_ffmpeg.sh
+++ b/docker/tensorrt/build_jetson_ffmpeg.sh
@ -0,0 +1,59 @@
+#!/bin/bash
+
+# For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb
+# with accelerated decoding, but it doesn't have accelerated scaling or encoding
+
+set -euxo pipefail
+
+INSTALL_PREFIX=/rootfs/usr/local
+
+apt-get -qq update
+apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config
+apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev
+
+pushd /tmp
+
+# Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi)
+if [ -e /usr/local/cuda-10.2 ]; then
+    # assume Jetpack 4.X
+    wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2
+else
+    # assume Jetpack 5.X
+    wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2
+fi
+tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2
+
+wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip
+unzip master.zip && rm master.zip && cd jetson-ffmpeg-master
+LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH   # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build
+mkdir build
+cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
+make -j$(nproc)
+make install
+cd ../../
+
+# Install nv-codec-headers to enable ffnvcodec filters (scale_cuda)
+wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip
+unzip master.zip && rm master.zip && cd nv-codec-headers-master
+make PREFIX=$INSTALL_PREFIX install
+cd ../ && rm -rf nv-codec-headers-master
+
+# Build ffmpeg with nvmpi patch
+wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz
+tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-*
+patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch
+export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig
+# enable Jetson codecs but disable dGPU codecs
+./configure --cc='ccache gcc' --cxx='ccache g++' \
+            --enable-shared --disable-static --prefix=$INSTALL_PREFIX \
+            --enable-gpl --enable-libx264  --enable-libx265 \
+            --enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \
+            --disable-cuvid --disable-nvenc --disable-nvdec \
+    || { cat ffbuild/config.log && false; }
+make -j$(nproc)
+make install
+cd ../
+
+rm -rf /var/lib/apt/lists/*
+popd
--- a/docker/tensorrt/trt.hcl
+++ b/docker/tensorrt/trt.hcl
@ -20,6 +20,12 @@ target "_build_args" {
  platforms = ["linux/${ARCH}"]
 }

+target wget {
+  dockerfile = "docker/main/Dockerfile"
+  target = "wget"
+  inherits = ["_build_args"]
+}
+
 target deps {
  dockerfile = "docker/main/Dockerfile"
  target = "deps"
@ -57,6 +63,7 @@ target "tensorrt" {
  dockerfile = "docker/tensorrt/Dockerfile.${ARCH}"
  context = "."
  contexts = {
+    wget = "target:wget",
    tensorrt-base = "target:tensorrt-base",
    rootfs = "target:rootfs"
    wheels = "target:wheels"
--- a/docs/docs/configuration/ffmpeg_presets.md
+++ b/docs/docs/configuration/ffmpeg_presets.md
@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not

 See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU.

-| Preset                | Usage                        | Other Notes                                           |
-| --------------------- | ---------------------------- | ----------------------------------------------------- |
-| preset-rpi-32-h264    | 32 bit Rpi with h264 stream  |                                                       |
-| preset-rpi-64-h264    | 64 bit Rpi with h264 stream  |                                                       |
-| preset-vaapi          | Intel & AMD VAAPI            | Check hwaccel docs to ensure correct driver is chosen |
-| preset-intel-qsv-h264 | Intel QSV with h264 stream   | If issues occur recommend using vaapi preset instead  |
-| preset-intel-qsv-h265 | Intel QSV with h265 stream   | If issues occur recommend using vaapi preset instead  |
-| preset-nvidia-h264    | Nvidia GPU with h264 stream  |                                                       |
-| preset-nvidia-h265    | Nvidia GPU with h265 stream  |                                                       |
-| preset-nvidia-mjpeg   | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264     |
+| Preset                | Usage                          | Other Notes                                           |
+| --------------------- | ------------------------------ | ----------------------------------------------------- |
+| preset-rpi-32-h264    | 32 bit Rpi with h264 stream    |                                                       |
+| preset-rpi-64-h264    | 64 bit Rpi with h264 stream    |                                                       |
+| preset-vaapi          | Intel & AMD VAAPI              | Check hwaccel docs to ensure correct driver is chosen |
+| preset-intel-qsv-h264 | Intel QSV with h264 stream     | If issues occur recommend using vaapi preset instead  |
+| preset-intel-qsv-h265 | Intel QSV with h265 stream     | If issues occur recommend using vaapi preset instead  |
+| preset-nvidia-h264    | Nvidia GPU with h264 stream    |                                                       |
+| preset-nvidia-h265    | Nvidia GPU with h265 stream    |                                                       |
+| preset-nvidia-mjpeg   | Nvidia GPU with mjpeg stream   | Recommend restreaming mjpeg and using nvidia-h264     |
+| preset-jetson-h264    | Nvidia Jetson with h264 stream |                                                        |
+| preset-jetson-h265    | Nvidia Jetson with h265 stream |                                                        |

 ### Input Args Presets

--- a/docs/docs/configuration/hardware_acceleration.md
+++ b/docs/docs/configuration/hardware_acceleration.md
@ -242,3 +242,46 @@ processes:
 If you do not see these processes, check the `docker logs` for the container and look for decoding errors.

 These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux).
+
+### NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano)
+
+A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build
+with codecs that use the Jetson's dedicated media engine. Use the `frigate-tensorrt-jp*` image with the nvidia container
+runtime:
+
+##### Docker Run CLI
+
+```bash
+docker run -d \
+  ...
+  --runtime nvidia
+  ghcr.io/blakeblackshear/frigate-tensorrt-jp5
+```
+
+If your Jetson host is running Jetpack 4.6, use the `frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5` image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform, but the image will still allow hardware decoding and tensorrt object detection.
+
+#### Setup Decoder
+
+The decoder you need to pass in the `hwaccel_args` will depend on the input video.
+
+A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports)
+
+```
+ V..... h264_nvmpi           h264 (nvmpi) (codec h264)
+ V..... hevc_nvmpi           hevc (nvmpi) (codec hevc)
+ V..... mpeg2_nvmpi          mpeg2 (nvmpi) (codec mpeg2video)
+ V..... mpeg4_nvmpi          mpeg4 (nvmpi) (codec mpeg4)
+ V..... vp8_nvmpi            vp8 (nvmpi) (codec vp8)
+ V..... vp9_nvmpi            vp9 (nvmpi) (codec vp9)
+```
+
+For example, for H264 video, you'll select `preset-jetson-h264`.
+
+```yaml
+ffmpeg:
+  hwaccel_args: preset-jetson-h264
+```
+
+If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption.
+Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show
+that NVDEC/NVDEC1 are in use.
--- a/docs/docs/development/contributing.md
+++ b/docs/docs/development/contributing.md
@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`.
 ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
 ```

-**NVIDIA**
+**NVIDIA GPU**

 ```shell
 ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
 ```

+**NVIDIA Jetson**
+
+```shell
+ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
+```
+
 **VAAPI**

 ```shell
--- a/frigate/config.py
+++ b/frigate/config.py
@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
            ffmpeg_input.global_args or self.ffmpeg.global_args
        )
        hwaccel_args = get_ffmpeg_arg_list(
-            parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args)
+            parse_preset_hardware_acceleration_decode(
+                ffmpeg_input.hwaccel_args,
+                self.detect.fps,
+                self.detect.width,
+                self.detect.height,
+            )
            or ffmpeg_input.hwaccel_args
-            or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args)
+            or parse_preset_hardware_acceleration_decode(
+                self.ffmpeg.hwaccel_args,
+                self.detect.fps,
+                self.detect.width,
+                self.detect.height,
+            )
            or self.ffmpeg.hwaccel_args
        )
        input_args = get_ffmpeg_arg_list(
--- a/frigate/ffmpeg_presets.py
+++ b/frigate/ffmpeg_presets.py
@ -63,6 +63,8 @@ PRESETS_HW_ACCEL_DECODE = {
    "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
    "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
    "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
+    "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
+    "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
 }

 PRESETS_HW_ACCEL_SCALE = {
@ -73,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
    "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
+    "preset-jetson-h264": "-r {0}",  # scaled in decoder
+    "preset-jetson-h265": "-r {0}",  # scaled in decoder
    "default": "-r {0} -vf fps={0},scale={1}:{2}",
 }

@ -84,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
+    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
+    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
 }

@ -95,11 +101,18 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
+    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
+    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
 }


-def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
+def parse_preset_hardware_acceleration_decode(
+    arg: Any,
+    fps: int,
+    width: int,
+    height: int,
+) -> list[str]:
    """Return the correct preset if in preset format otherwise return None."""
    if not isinstance(arg, str):
        return None
@ -109,7 +122,7 @@ def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
    if not decode:
        return None

-    return decode.split(" ")
+    return decode.format(fps, width, height).split(" ")


 def parse_preset_hardware_acceleration_scale(
@ -147,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
    if not isinstance(arg, str):
        return arg_map["default"].format(input, output)

+    # Not all jetsons have HW encoders, so fall back to default SW encoder if not
+    if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
+        arg = "default"
+
    return arg_map.get(arg, arg_map["default"]).format(
        input,
        output,