diff --git a/docker/tensorrt/Dockerfile.arm64 b/docker/tensorrt/Dockerfile.arm64 index 8341e6638..70184bf9b 100644 --- a/docker/tensorrt/Dockerfile.arm64 +++ b/docker/tensorrt/Dockerfile.arm64 @@ -52,12 +52,23 @@ RUN apt-get update \ RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \ pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt +FROM wget AS jetson-ffmpeg +ARG DEBIAN_FRONTEND +ENV CCACHE_DIR /root/.ccache +ENV CCACHE_MAXSIZE 2G +RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \ + --mount=type=cache,target=/root/.ccache \ + /deps/build_jetson_ffmpeg.sh + # Frigate w/ TensorRT for NVIDIA Jetson platforms FROM tensorrt-base AS frigate-tensorrt RUN apt-get update \ && apt-get install -y python-is-python3 libprotobuf17 \ && rm -rf /var/lib/apt/lists/* +RUN rm -rf /usr/lib/btbn-ffmpeg/ +COPY --from=jetson-ffmpeg /rootfs / + COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \ diff --git a/docker/tensorrt/build_jetson_ffmpeg.sh b/docker/tensorrt/build_jetson_ffmpeg.sh new file mode 100755 index 000000000..8c532ebc3 --- /dev/null +++ b/docker/tensorrt/build_jetson_ffmpeg.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb +# with accelerated decoding, but it doesn't have accelerated scaling or encoding + +set -euxo pipefail + +INSTALL_PREFIX=/rootfs/usr/local + +apt-get -qq update +apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config +apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev + +pushd /tmp + +# Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi) +if [ -e /usr/local/cuda-10.2 ]; then + # assume Jetpack 4.X + wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2 +else + # assume Jetpack 5.X + wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2 +fi +tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2 + +wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip +unzip master.zip && rm master.zip && cd jetson-ffmpeg-master +LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX +make -j$(nproc) +make install +cd ../../ + +# Install nv-codec-headers to enable ffnvcodec filters (scale_cuda) +wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip +unzip master.zip && rm master.zip && cd nv-codec-headers-master +make PREFIX=$INSTALL_PREFIX install +cd ../ && rm -rf nv-codec-headers-master + +# Build ffmpeg with nvmpi patch +wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz +tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-* +patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch +export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig +# enable Jetson codecs but disable dGPU codecs +./configure --cc='ccache gcc' --cxx='ccache g++' \ + --enable-shared --disable-static --prefix=$INSTALL_PREFIX \ + --enable-gpl --enable-libx264 --enable-libx265 \ + --enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \ + --disable-cuvid --disable-nvenc --disable-nvdec \ + || { cat ffbuild/config.log && false; } +make -j$(nproc) +make install +cd ../ + +rm -rf /var/lib/apt/lists/* +popd diff --git a/docker/tensorrt/trt.hcl b/docker/tensorrt/trt.hcl index 895936eb4..56e294100 100644 --- a/docker/tensorrt/trt.hcl +++ b/docker/tensorrt/trt.hcl @@ -20,6 +20,12 @@ target "_build_args" { platforms = ["linux/${ARCH}"] } +target wget { + dockerfile = "docker/main/Dockerfile" + target = "wget" + inherits = ["_build_args"] +} + target deps { dockerfile = "docker/main/Dockerfile" target = "deps" @@ -66,6 +72,7 @@ target "tensorrt" { dockerfile = "docker/tensorrt/Dockerfile.${ARCH}" context = "." contexts = { + wget = "target:wget", tensorrt-base = "target:tensorrt-base", rootfs = "target:rootfs" wheels = "target:wheels" diff --git a/docs/docs/configuration/ffmpeg_presets.md b/docs/docs/configuration/ffmpeg_presets.md index 66747350e..e36419c13 100644 --- a/docs/docs/configuration/ffmpeg_presets.md +++ b/docs/docs/configuration/ffmpeg_presets.md @@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU. -| Preset | Usage | Other Notes | -| --------------------- | ---------------------------- | ----------------------------------------------------- | -| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | -| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | -| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | -| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | -| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | -| preset-nvidia-h264 | Nvidia GPU with h264 stream | | -| preset-nvidia-h265 | Nvidia GPU with h265 stream | | -| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | +| Preset | Usage | Other Notes | +| --------------------- | ------------------------------ | ----------------------------------------------------- | +| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | +| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | +| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | +| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | +| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | +| preset-nvidia-h264 | Nvidia GPU with h264 stream | | +| preset-nvidia-h265 | Nvidia GPU with h265 stream | | +| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | +| preset-jetson-h264 | Nvidia Jetson with h264 stream | | +| preset-jetson-h265 | Nvidia Jetson with h265 stream | | ### Input Args Presets diff --git a/docs/docs/configuration/hardware_acceleration.md b/docs/docs/configuration/hardware_acceleration.md index cb042c860..a1d952982 100644 --- a/docs/docs/configuration/hardware_acceleration.md +++ b/docs/docs/configuration/hardware_acceleration.md @@ -246,3 +246,46 @@ If you do not see these processes, check the `docker logs` for the container and These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux). # Community Supported + +## NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano) + +A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build +with codecs that use the Jetson's dedicated media engine. Use the `frigate-tensorrt-jp*` image with the nvidia container +runtime: + +#### Docker Run CLI + +```bash +docker run -d \ + ... + --runtime nvidia + ghcr.io/blakeblackshear/frigate-tensorrt-jp5 +``` + +If your Jetson host is running Jetpack 4.6, use the `frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5` image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform, but the image will still allow hardware decoding and tensorrt object detection. + +### Setup Decoder + +The decoder you need to pass in the `hwaccel_args` will depend on the input video. + +A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports) + +``` + V..... h264_nvmpi h264 (nvmpi) (codec h264) + V..... hevc_nvmpi hevc (nvmpi) (codec hevc) + V..... mpeg2_nvmpi mpeg2 (nvmpi) (codec mpeg2video) + V..... mpeg4_nvmpi mpeg4 (nvmpi) (codec mpeg4) + V..... vp8_nvmpi vp8 (nvmpi) (codec vp8) + V..... vp9_nvmpi vp9 (nvmpi) (codec vp9) +``` + +For example, for H264 video, you'll select `preset-jetson-h264`. + +```yaml +ffmpeg: + hwaccel_args: preset-jetson-h264 +``` + +If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption. +Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show +that NVDEC/NVDEC1 are in use. diff --git a/docs/docs/development/contributing.md b/docs/docs/development/contributing.md index 3045546c6..b98ceb035 100644 --- a/docs/docs/development/contributing.md +++ b/docs/docs/development/contributing.md @@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`. ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ``` -**NVIDIA** +**NVIDIA GPU** ```shell ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ``` +**NVIDIA Jetson** + +```shell +ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null +``` + **VAAPI** ```shell diff --git a/frigate/config.py b/frigate/config.py index 59e086989..101775f6d 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel): ffmpeg_input.global_args or self.ffmpeg.global_args ) hwaccel_args = get_ffmpeg_arg_list( - parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args) + parse_preset_hardware_acceleration_decode( + ffmpeg_input.hwaccel_args, + self.detect.fps, + self.detect.width, + self.detect.height, + ) or ffmpeg_input.hwaccel_args - or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args) + or parse_preset_hardware_acceleration_decode( + self.ffmpeg.hwaccel_args, + self.detect.fps, + self.detect.width, + self.detect.height, + ) or self.ffmpeg.hwaccel_args ) input_args = get_ffmpeg_arg_list( diff --git a/frigate/ffmpeg_presets.py b/frigate/ffmpeg_presets.py index a48b4a209..571781551 100644 --- a/frigate/ffmpeg_presets.py +++ b/frigate/ffmpeg_presets.py @@ -63,6 +63,8 @@ PRESETS_HW_ACCEL_DECODE = { "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda", "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda", "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda", + "preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}", + "preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}", } PRESETS_HW_ACCEL_SCALE = { @@ -73,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = { "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", + "preset-jetson-h264": "-r {0}", # scaled in decoder + "preset-jetson-h265": "-r {0}", # scaled in decoder "default": "-r {0} -vf fps={0},scale={1}:{2}", } @@ -84,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = { "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", + "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", + "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}", } @@ -95,11 +101,18 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = { "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}", "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}", + "preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}", + "preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}", } -def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]: +def parse_preset_hardware_acceleration_decode( + arg: Any, + fps: int, + width: int, + height: int, +) -> list[str]: """Return the correct preset if in preset format otherwise return None.""" if not isinstance(arg, str): return None @@ -109,7 +122,7 @@ def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]: if not decode: return None - return decode.split(" ") + return decode.format(fps, width, height).split(" ") def parse_preset_hardware_acceleration_scale( @@ -147,6 +160,10 @@ def parse_preset_hardware_acceleration_encode( if not isinstance(arg, str): return arg_map["default"].format(input, output) + # Not all jetsons have HW encoders, so fall back to default SW encoder if not + if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"): + arg = "default" + return arg_map.get(arg, arg_map["default"]).format( input, output,