Add Jetson ffmpeg hwaccel

2026-02-05 18:55:23 +03:00 · 2023-07-18 12:55:45 -04:00 · 2023-07-18 12:55:45 -04:00 · b0d94a0ce7
commit b0d94a0ce7
parent 288828750c
8 changed files with 170 additions and 15 deletions
--- a/docker/tensorrt/Dockerfile.arm64
+++ b/docker/tensorrt/Dockerfile.arm64
@ -34,12 +34,23 @@ RUN apt-get update \
 RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \
    pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt
 FROM wget AS jetson-ffmpeg
 ARG DEBIAN_FRONTEND
 ENV CCACHE_DIR /root/.ccache
 ENV CCACHE_MAXSIZE 2G
 RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \
    --mount=type=cache,target=/root/.ccache \
    /deps/build_jetson_ffmpeg.sh
 # Frigate w/ TensorRT for NVIDIA Jetson platforms
 FROM tensorrt-base AS frigate-tensorrt
 RUN apt-get update \
    && apt-get install -y python-is-python3 libprotobuf17  \
    && rm -rf /var/lib/apt/lists/*
 RUN rm -rf /usr/lib/btbn-ffmpeg/
 COPY --from=jetson-ffmpeg /rootfs /
 COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER
 RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
    --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \
--- a/docker/tensorrt/build_jetson_ffmpeg.sh
+++ b/docker/tensorrt/build_jetson_ffmpeg.sh
@ -0,0 +1,59 @@
 #!/bin/bash
 # For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb
 # with accelerated decoding, but it doesn't have accelerated scaling or encoding
 set -euxo pipefail
 INSTALL_PREFIX=/rootfs/usr/local
 apt-get -qq update
 apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config
 apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev
 pushd /tmp
 # Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi)
 if [ -e /usr/local/cuda-10.2 ]; then
    # assume Jetpack 4.X
    wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2
 else
    # assume Jetpack 5.X
    wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2
 fi
 tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2
 wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip
 unzip master.zip && rm master.zip && cd jetson-ffmpeg-master
 LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH   # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build
 mkdir build
 cd build
 cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
 make -j$(nproc)
 make install
 cd ../../
 # Install nv-codec-headers to enable ffnvcodec filters (scale_cuda)
 wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip
 unzip master.zip && rm master.zip && cd nv-codec-headers-master
 make PREFIX=$INSTALL_PREFIX install
 cd ../ && rm -rf nv-codec-headers-master
 # Build ffmpeg with nvmpi patch
 wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz
 tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-*
 patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch
 export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig
 # enable Jetson codecs but disable dGPU codecs
 ./configure --cc='ccache gcc' --cxx='ccache g++' \
            --enable-shared --disable-static --prefix=$INSTALL_PREFIX \
            --enable-gpl --enable-libx264  --enable-libx265 \
            --enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \
            --disable-cuvid --disable-nvenc --disable-nvdec \
    || { cat ffbuild/config.log && false; }
 make -j$(nproc)
 make install
 cd ../
 rm -rf /var/lib/apt/lists/*
 popd
--- a/docker/tensorrt/trt.hcl
+++ b/docker/tensorrt/trt.hcl
@ -20,6 +20,12 @@ target "_build_args" {
  platforms = ["linux/${ARCH}"]
 }
 target wget {
  dockerfile = "docker/main/Dockerfile"
  target = "wget"
  inherits = ["_build_args"]
 }
 target deps {
  dockerfile = "docker/main/Dockerfile"
  target = "deps"
@ -57,6 +63,7 @@ target "tensorrt" {
  dockerfile = "docker/tensorrt/Dockerfile.${ARCH}"
  context = "."
  contexts = {
    wget = "target:wget",
    tensorrt-base = "target:tensorrt-base",
    rootfs = "target:rootfs"
    wheels = "target:wheels"
--- a/docs/docs/configuration/ffmpeg_presets.md
+++ b/docs/docs/configuration/ffmpeg_presets.md
@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not
 See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU.
-| Preset                | Usage                        | Other Notes                                           |
+| Preset                | Usage                          | Other Notes                                           |
-| --------------------- | ---------------------------- | ----------------------------------------------------- |
+| --------------------- | ------------------------------ | ----------------------------------------------------- |
-| preset-rpi-32-h264    | 32 bit Rpi with h264 stream  |                                                       |
+| preset-rpi-32-h264    | 32 bit Rpi with h264 stream    |                                                       |
-| preset-rpi-64-h264    | 64 bit Rpi with h264 stream  |                                                       |
+| preset-rpi-64-h264    | 64 bit Rpi with h264 stream    |                                                       |
-| preset-vaapi          | Intel & AMD VAAPI            | Check hwaccel docs to ensure correct driver is chosen |
+| preset-vaapi          | Intel & AMD VAAPI              | Check hwaccel docs to ensure correct driver is chosen |
-| preset-intel-qsv-h264 | Intel QSV with h264 stream   | If issues occur recommend using vaapi preset instead  |
+| preset-intel-qsv-h264 | Intel QSV with h264 stream     | If issues occur recommend using vaapi preset instead  |
-| preset-intel-qsv-h265 | Intel QSV with h265 stream   | If issues occur recommend using vaapi preset instead  |
+| preset-intel-qsv-h265 | Intel QSV with h265 stream     | If issues occur recommend using vaapi preset instead  |
-| preset-nvidia-h264    | Nvidia GPU with h264 stream  |                                                       |
+| preset-nvidia-h264    | Nvidia GPU with h264 stream    |                                                       |
-| preset-nvidia-h265    | Nvidia GPU with h265 stream  |                                                       |
+| preset-nvidia-h265    | Nvidia GPU with h265 stream    |                                                       |
-| preset-nvidia-mjpeg   | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264     |
+| preset-nvidia-mjpeg   | Nvidia GPU with mjpeg stream   | Recommend restreaming mjpeg and using nvidia-h264     |
 | preset-jetson-h264    | Nvidia Jetson with h264 stream |                                                        |
 | preset-jetson-h265    | Nvidia Jetson with h265 stream |                                                        |
 ### Input Args Presets
--- a/docs/docs/configuration/hardware_acceleration.md
+++ b/docs/docs/configuration/hardware_acceleration.md
@ -242,3 +242,46 @@ processes:
 If you do not see these processes, check the `docker logs` for the container and look for decoding errors.
 These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux).
 ### NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano)
 A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build
 with codecs that use the Jetson's dedicated media engine. Use the `frigate-tensorrt-jp*` image with the nvidia container
 runtime:
 ##### Docker Run CLI
 ```bash
 docker run -d \
  ...
  --runtime nvidia
  ghcr.io/blakeblackshear/frigate-tensorrt-jp5
 ```
 If your Jetson host is running Jetpack 4.6, use the `frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5` image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform, but the image will still allow hardware decoding and tensorrt object detection.
 #### Setup Decoder
 The decoder you need to pass in the `hwaccel_args` will depend on the input video.
 A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports)
 ```
 V..... h264_nvmpi           h264 (nvmpi) (codec h264)
 V..... hevc_nvmpi           hevc (nvmpi) (codec hevc)
 V..... mpeg2_nvmpi          mpeg2 (nvmpi) (codec mpeg2video)
 V..... mpeg4_nvmpi          mpeg4 (nvmpi) (codec mpeg4)
 V..... vp8_nvmpi            vp8 (nvmpi) (codec vp8)
 V..... vp9_nvmpi            vp9 (nvmpi) (codec vp9)
 ```
 For example, for H264 video, you'll select `preset-jetson-h264`.
 ```yaml
 ffmpeg:
  hwaccel_args: preset-jetson-h264
 ```
 If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption.
 Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show
 that NVDEC/NVDEC1 are in use.
--- a/docs/docs/development/contributing.md
+++ b/docs/docs/development/contributing.md
@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`.
 ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
 ```
-**NVIDIA**
+**NVIDIA GPU**
 ```shell
 ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
 ```
 **NVIDIA Jetson**
 ```shell
 ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
 ```
 **VAAPI**
 ```shell
--- a/frigate/config.py
+++ b/frigate/config.py
@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
            ffmpeg_input.global_args or self.ffmpeg.global_args
        )
        hwaccel_args = get_ffmpeg_arg_list(
-            parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args)
+            parse_preset_hardware_acceleration_decode(
                ffmpeg_input.hwaccel_args,
                self.detect.fps,
                self.detect.width,
                self.detect.height,
            )
            or ffmpeg_input.hwaccel_args
-            or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args)
+            or parse_preset_hardware_acceleration_decode(
                self.ffmpeg.hwaccel_args,
                self.detect.fps,
                self.detect.width,
                self.detect.height,
            )
            or self.ffmpeg.hwaccel_args
        )
        input_args = get_ffmpeg_arg_list(
--- a/frigate/ffmpeg_presets.py
+++ b/frigate/ffmpeg_presets.py
@ -63,6 +63,8 @@ PRESETS_HW_ACCEL_DECODE = {
    "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
    "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
    "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
    "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
    "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
 }
 PRESETS_HW_ACCEL_SCALE = {
@ -73,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
    "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
    "preset-jetson-h264": "-r {0}",  # scaled in decoder
    "preset-jetson-h265": "-r {0}",  # scaled in decoder
    "default": "-r {0} -vf fps={0},scale={1}:{2}",
 }
@ -84,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
 }
@ -95,11 +101,18 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
    "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
    "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
    "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
    "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
    "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
    "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
 }
-def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
+def parse_preset_hardware_acceleration_decode(
    arg: Any,
    fps: int,
    width: int,
    height: int,
 ) -> list[str]:
    """Return the correct preset if in preset format otherwise return None."""
    if not isinstance(arg, str):
        return None
@ -109,7 +122,7 @@ def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
    if not decode:
        return None
-    return decode.split(" ")
+    return decode.format(fps, width, height).split(" ")
 def parse_preset_hardware_acceleration_scale(
@ -147,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
    if not isinstance(arg, str):
        return arg_map["default"].format(input, output)
    # Not all jetsons have HW encoders, so fall back to default SW encoder if not
    if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
        arg = "default"
    return arg_map.get(arg, arg_map["default"]).format(
        input,
        output,