Add Jetson ffmpeg hwaccel

This commit is contained in:
Andrew Reiter 2023-07-18 12:55:45 -04:00
parent 288828750c
commit b0d94a0ce7
8 changed files with 170 additions and 15 deletions

View File

@ -34,12 +34,23 @@ RUN apt-get update \
RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \ RUN --mount=type=bind,source=docker/tensorrt/requirements-models-arm64.txt,target=/requirements-tensorrt-models.txt \
pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt pip3 wheel --wheel-dir=/trt-model-wheels -r /requirements-tensorrt-models.txt
FROM wget AS jetson-ffmpeg
ARG DEBIAN_FRONTEND
ENV CCACHE_DIR /root/.ccache
ENV CCACHE_MAXSIZE 2G
RUN --mount=type=bind,source=docker/tensorrt/build_jetson_ffmpeg.sh,target=/deps/build_jetson_ffmpeg.sh \
--mount=type=cache,target=/root/.ccache \
/deps/build_jetson_ffmpeg.sh
# Frigate w/ TensorRT for NVIDIA Jetson platforms # Frigate w/ TensorRT for NVIDIA Jetson platforms
FROM tensorrt-base AS frigate-tensorrt FROM tensorrt-base AS frigate-tensorrt
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y python-is-python3 libprotobuf17 \ && apt-get install -y python-is-python3 libprotobuf17 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN rm -rf /usr/lib/btbn-ffmpeg/
COPY --from=jetson-ffmpeg /rootfs /
COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER COPY --from=trt-wheels /etc/TENSORRT_VER /etc/TENSORRT_VER
RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \ RUN --mount=type=bind,from=trt-wheels,source=/trt-wheels,target=/deps/trt-wheels \
--mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \ --mount=type=bind,from=trt-model-wheels,source=/trt-model-wheels,target=/deps/trt-model-wheels \

View File

@ -0,0 +1,59 @@
#!/bin/bash
# For jetson platforms, build ffmpeg with custom patches. NVIDIA supplies a deb
# with accelerated decoding, but it doesn't have accelerated scaling or encoding
set -euxo pipefail
INSTALL_PREFIX=/rootfs/usr/local
apt-get -qq update
apt-get -qq install -y --no-install-recommends build-essential ccache clang cmake pkg-config
apt-get -qq install -y --no-install-recommends libx264-dev libx265-dev
pushd /tmp
# Install libnvmpi to enable nvmpi decoders (h264_nvmpi, hevc_nvmpi)
if [ -e /usr/local/cuda-10.2 ]; then
# assume Jetpack 4.X
wget -q https://developer.nvidia.com/embedded/L4T/r32_Release_v5.0/T186/Jetson_Multimedia_API_R32.5.0_aarch64.tbz2 -O jetson_multimedia_api.tbz2
else
# assume Jetpack 5.X
wget -q https://developer.nvidia.com/downloads/embedded/l4t/r35_release_v3.1/release/jetson_multimedia_api_r35.3.1_aarch64.tbz2 -O jetson_multimedia_api.tbz2
fi
tar xaf jetson_multimedia_api.tbz2 -C / && rm jetson_multimedia_api.tbz2
wget -q https://github.com/madsciencetist/jetson-ffmpeg/archive/refs/heads/master.zip
unzip master.zip && rm master.zip && cd jetson-ffmpeg-master
LD_LIBRARY_PATH=$(pwd)/stubs:$LD_LIBRARY_PATH # tegra multimedia libs aren't available in image, so use stubs for ffmpeg build
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX
make -j$(nproc)
make install
cd ../../
# Install nv-codec-headers to enable ffnvcodec filters (scale_cuda)
wget -q https://github.com/FFmpeg/nv-codec-headers/archive/refs/heads/master.zip
unzip master.zip && rm master.zip && cd nv-codec-headers-master
make PREFIX=$INSTALL_PREFIX install
cd ../ && rm -rf nv-codec-headers-master
# Build ffmpeg with nvmpi patch
wget -q https://ffmpeg.org/releases/ffmpeg-6.0.tar.xz
tar xaf ffmpeg-*.tar.xz && rm ffmpeg-*.tar.xz && cd ffmpeg-*
patch -p1 < ../jetson-ffmpeg-master/ffmpeg_patches/ffmpeg6.0_nvmpi.patch
export PKG_CONFIG_PATH=$INSTALL_PREFIX/lib/pkgconfig
# enable Jetson codecs but disable dGPU codecs
./configure --cc='ccache gcc' --cxx='ccache g++' \
--enable-shared --disable-static --prefix=$INSTALL_PREFIX \
--enable-gpl --enable-libx264 --enable-libx265 \
--enable-nvmpi --enable-ffnvcodec --enable-cuda-llvm \
--disable-cuvid --disable-nvenc --disable-nvdec \
|| { cat ffbuild/config.log && false; }
make -j$(nproc)
make install
cd ../
rm -rf /var/lib/apt/lists/*
popd

View File

@ -20,6 +20,12 @@ target "_build_args" {
platforms = ["linux/${ARCH}"] platforms = ["linux/${ARCH}"]
} }
target wget {
dockerfile = "docker/main/Dockerfile"
target = "wget"
inherits = ["_build_args"]
}
target deps { target deps {
dockerfile = "docker/main/Dockerfile" dockerfile = "docker/main/Dockerfile"
target = "deps" target = "deps"
@ -57,6 +63,7 @@ target "tensorrt" {
dockerfile = "docker/tensorrt/Dockerfile.${ARCH}" dockerfile = "docker/tensorrt/Dockerfile.${ARCH}"
context = "." context = "."
contexts = { contexts = {
wget = "target:wget",
tensorrt-base = "target:tensorrt-base", tensorrt-base = "target:tensorrt-base",
rootfs = "target:rootfs" rootfs = "target:rootfs"
wheels = "target:wheels" wheels = "target:wheels"

View File

@ -11,16 +11,18 @@ It is highly recommended to use hwaccel presets in the config. These presets not
See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU. See [the hwaccel docs](/configuration/hardware_acceleration.md) for more info on how to setup hwaccel for your GPU / iGPU.
| Preset | Usage | Other Notes | | Preset | Usage | Other Notes |
| --------------------- | ---------------------------- | ----------------------------------------------------- | | --------------------- | ------------------------------ | ----------------------------------------------------- |
| preset-rpi-32-h264 | 32 bit Rpi with h264 stream | | | preset-rpi-32-h264 | 32 bit Rpi with h264 stream | |
| preset-rpi-64-h264 | 64 bit Rpi with h264 stream | | | preset-rpi-64-h264 | 64 bit Rpi with h264 stream | |
| preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen | | preset-vaapi | Intel & AMD VAAPI | Check hwaccel docs to ensure correct driver is chosen |
| preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead | | preset-intel-qsv-h264 | Intel QSV with h264 stream | If issues occur recommend using vaapi preset instead |
| preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead | | preset-intel-qsv-h265 | Intel QSV with h265 stream | If issues occur recommend using vaapi preset instead |
| preset-nvidia-h264 | Nvidia GPU with h264 stream | | | preset-nvidia-h264 | Nvidia GPU with h264 stream | |
| preset-nvidia-h265 | Nvidia GPU with h265 stream | | | preset-nvidia-h265 | Nvidia GPU with h265 stream | |
| preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 | | preset-nvidia-mjpeg | Nvidia GPU with mjpeg stream | Recommend restreaming mjpeg and using nvidia-h264 |
| preset-jetson-h264 | Nvidia Jetson with h264 stream | |
| preset-jetson-h265 | Nvidia Jetson with h265 stream | |
### Input Args Presets ### Input Args Presets

View File

@ -242,3 +242,46 @@ processes:
If you do not see these processes, check the `docker logs` for the container and look for decoding errors. If you do not see these processes, check the `docker logs` for the container and look for decoding errors.
These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux). These instructions were originally based on the [Jellyfin documentation](https://jellyfin.org/docs/general/administration/hardware-acceleration.html#nvidia-hardware-acceleration-on-docker-linux).
### NVIDIA Jetson (Orin AGX, Orin NX, Orin Nano*, Xavier AGX, Xavier NX, TX2, TX1, Nano)
A separate set of docker images is available that is based on Jetpack/L4T. They comes with an `ffmpeg` build
with codecs that use the Jetson's dedicated media engine. Use the `frigate-tensorrt-jp*` image with the nvidia container
runtime:
##### Docker Run CLI
```bash
docker run -d \
...
--runtime nvidia
ghcr.io/blakeblackshear/frigate-tensorrt-jp5
```
If your Jetson host is running Jetpack 4.6, use the `frigate-tensorrt-jp4` image, or if your Jetson host is running Jetpack 5.0+, use the `frigate-tensorrt-jp5` image. Note that the Orin Nano has no video encoder, so frigate will use software encoding on this platform, but the image will still allow hardware decoding and tensorrt object detection.
#### Setup Decoder
The decoder you need to pass in the `hwaccel_args` will depend on the input video.
A list of supported codecs (you can use `ffmpeg -decoders | grep nvmpi` in the container to get the ones your card supports)
```
V..... h264_nvmpi h264 (nvmpi) (codec h264)
V..... hevc_nvmpi hevc (nvmpi) (codec hevc)
V..... mpeg2_nvmpi mpeg2 (nvmpi) (codec mpeg2video)
V..... mpeg4_nvmpi mpeg4 (nvmpi) (codec mpeg4)
V..... vp8_nvmpi vp8 (nvmpi) (codec vp8)
V..... vp9_nvmpi vp9 (nvmpi) (codec vp9)
```
For example, for H264 video, you'll select `preset-jetson-h264`.
```yaml
ffmpeg:
hwaccel_args: preset-jetson-h264
```
If everything is working correctly, you should see a significant reduction in ffmpeg CPU load and power consumption.
Verify that hardware decoding is working by running `jtop` (`sudo pip3 install -U jetson-stats`), which should show
that NVDEC/NVDEC1 are in use.

View File

@ -101,12 +101,18 @@ This should show <50% CPU in top, and ~80% CPU without `-c:v h264_v4l2m2m`.
ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ffmpeg -c:v h264_v4l2m2m -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
``` ```
**NVIDIA** **NVIDIA GPU**
```shell ```shell
ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null ffmpeg -c:v h264_cuvid -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
``` ```
**NVIDIA Jetson**
```shell
ffmpeg -c:v h264_nvmpi -re -stream_loop -1 -i https://streams.videolan.org/ffmpeg/incoming/720p60.mp4 -f rawvideo -pix_fmt yuv420p pipe: > /dev/null
```
**VAAPI** **VAAPI**
```shell ```shell

View File

@ -804,9 +804,19 @@ class CameraConfig(FrigateBaseModel):
ffmpeg_input.global_args or self.ffmpeg.global_args ffmpeg_input.global_args or self.ffmpeg.global_args
) )
hwaccel_args = get_ffmpeg_arg_list( hwaccel_args = get_ffmpeg_arg_list(
parse_preset_hardware_acceleration_decode(ffmpeg_input.hwaccel_args) parse_preset_hardware_acceleration_decode(
ffmpeg_input.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or ffmpeg_input.hwaccel_args or ffmpeg_input.hwaccel_args
or parse_preset_hardware_acceleration_decode(self.ffmpeg.hwaccel_args) or parse_preset_hardware_acceleration_decode(
self.ffmpeg.hwaccel_args,
self.detect.fps,
self.detect.width,
self.detect.height,
)
or self.ffmpeg.hwaccel_args or self.ffmpeg.hwaccel_args
) )
input_args = get_ffmpeg_arg_list( input_args = get_ffmpeg_arg_list(

View File

@ -63,6 +63,8 @@ PRESETS_HW_ACCEL_DECODE = {
"preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda", "preset-nvidia-h264": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda", "preset-nvidia-h265": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda", "preset-nvidia-mjpeg": "-hwaccel cuda -hwaccel_output_format cuda",
"preset-jetson-h264": "-c:v h264_nvmpi -resize {1}x{2}",
"preset-jetson-h265": "-c:v hevc_nvmpi -resize {1}x{2}",
} }
PRESETS_HW_ACCEL_SCALE = { PRESETS_HW_ACCEL_SCALE = {
@ -73,6 +75,8 @@ PRESETS_HW_ACCEL_SCALE = {
"preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-intel-qsv-h265": "-r {0} -vf vpp_qsv=framerate={0}:w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h264": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p", "preset-nvidia-h265": "-r {0} -vf fps={0},scale_cuda=w={1}:h={2}:format=nv12,hwdownload,format=nv12,format=yuv420p",
"preset-jetson-h264": "-r {0}", # scaled in decoder
"preset-jetson-h265": "-r {0}", # scaled in decoder
"default": "-r {0} -vf fps={0},scale={1}:{2}", "default": "-r {0} -vf fps={0},scale={1}:{2}",
} }
@ -84,6 +88,8 @@ PRESETS_HW_ACCEL_ENCODE_BIRDSEYE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v h264_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h264": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}", "preset-nvidia-h265": "ffmpeg -hide_banner {0} -c:v h264_nvenc -g 50 -profile:v high -level:v auto -preset:v p2 -tune:v ll {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -g 50 -profile:v high -level:v 4.1 -preset:v superfast -tune:v zerolatency {1}",
} }
@ -95,11 +101,18 @@ PRESETS_HW_ACCEL_ENCODE_TIMELAPSE = {
"preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}", "preset-intel-qsv-h265": "ffmpeg -hide_banner {0} -c:v hevc_qsv -g 50 -bf 0 -profile:v high -level:v 4.1 -async_depth:v 1 {1}",
"preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}", "preset-nvidia-h264": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v h264_nvenc {1}",
"preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}", "preset-nvidia-h265": "ffmpeg -hide_banner -hwaccel cuda -hwaccel_output_format cuda -extra_hw_frames 8 {0} -c:v hevc_nvenc {1}",
"preset-jetson-h264": "ffmpeg -hide_banner {0} -c:v h264_nvmpi -profile high {1}",
"preset-jetson-h265": "ffmpeg -hide_banner {0} -c:v hevc_nvmpi -profile high {1}",
"default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}", "default": "ffmpeg -hide_banner {0} -c:v libx264 -preset:v ultrafast -tune:v zerolatency {1}",
} }
def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]: def parse_preset_hardware_acceleration_decode(
arg: Any,
fps: int,
width: int,
height: int,
) -> list[str]:
"""Return the correct preset if in preset format otherwise return None.""" """Return the correct preset if in preset format otherwise return None."""
if not isinstance(arg, str): if not isinstance(arg, str):
return None return None
@ -109,7 +122,7 @@ def parse_preset_hardware_acceleration_decode(arg: Any) -> list[str]:
if not decode: if not decode:
return None return None
return decode.split(" ") return decode.format(fps, width, height).split(" ")
def parse_preset_hardware_acceleration_scale( def parse_preset_hardware_acceleration_scale(
@ -147,6 +160,10 @@ def parse_preset_hardware_acceleration_encode(
if not isinstance(arg, str): if not isinstance(arg, str):
return arg_map["default"].format(input, output) return arg_map["default"].format(input, output)
# Not all jetsons have HW encoders, so fall back to default SW encoder if not
if arg.startswith("preset-jetson-") and not os.path.exists("/dev/nvhost-msenc"):
arg = "default"
return arg_map.get(arg, arg_map["default"]).format( return arg_map.get(arg, arg_map["default"]).format(
input, input,
output, output,