From 76a1230885cc49e866eb54765b855fdb19748099 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 5 May 2026 15:33:43 -0600 Subject: [PATCH] ROCm Optimizations (#23118) * Update to ROCm 7.2.3 * Add inference time for 9060XT * Update times * Update hardware info for latest ROCm * Add env vars to save kernels and miopen database * re-enable face recognition for ROCm * Update * Save LLVM cache --- docker/rocm/Dockerfile | 6 +++++- docker/rocm/requirements-wheels-rocm.txt | 2 +- docker/rocm/rocm.hcl | 2 +- docs/docs/configuration/object_detectors.md | 12 ++++++------ docs/docs/frigate/hardware.md | 9 +++++---- frigate/detectors/detection_runners.py | 1 - 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index bbe84c1b1..653ed1e4e 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -13,7 +13,7 @@ ARG ROCM RUN apt update -qq && \ apt install -y wget gpg && \ - wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.2/ubuntu/jammy/amdgpu-install_7.2.70200-1_all.deb && \ + wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.2.3/ubuntu/jammy/amdgpu-install_7.2.3.70203-1_all.deb && \ apt install -y ./rocm.deb && \ apt update && \ apt install -qq -y rocm @@ -78,6 +78,10 @@ ENV MIGRAPHX_DISABLE_MIOPEN_FUSION=1 ENV MIGRAPHX_DISABLE_SCHEDULE_PASS=1 ENV MIGRAPHX_DISABLE_REDUCE_FUSION=1 ENV MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1 +ENV MIOPEN_CUSTOM_CACHE_DIR=/config/model_cache/migraphx +ENV MIOPEN_USER_DB_PATH=/config/model_cache/migraphx +ENV AMD_COMGR_CACHE=1 +ENV AMD_COMGR_CACHE_DIR=/config/model_cache/migraphx COPY --from=rocm-dist / / diff --git a/docker/rocm/requirements-wheels-rocm.txt b/docker/rocm/requirements-wheels-rocm.txt index da22f2ff6..f60b550c3 100644 --- a/docker/rocm/requirements-wheels-rocm.txt +++ b/docker/rocm/requirements-wheels-rocm.txt @@ -1 +1 @@ -onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.2.0/onnxruntime_migraphx-1.23.1-cp311-cp311-linux_x86_64.whl \ No newline at end of file +onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.2.3-1/onnxruntime_migraphx-1.24.4-cp311-cp311-linux_x86_64.whl \ No newline at end of file diff --git a/docker/rocm/rocm.hcl b/docker/rocm/rocm.hcl index 710bfe995..224118818 100644 --- a/docker/rocm/rocm.hcl +++ b/docker/rocm/rocm.hcl @@ -1,5 +1,5 @@ variable "ROCM" { - default = "7.2.0" + default = "7.2.3" } variable "HSA_OVERRIDE_GFX_VERSION" { default = "" diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 7519f8e8f..99b9e1f35 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1022,12 +1022,12 @@ detectors: ### ONNX Supported Models -| Model | Nvidia GPU | AMD GPU | Notes | -| ----------------------------- | ---------- | ------- | --------------------------------------------------- | -| [YOLOv9](#yolo-v3-v4-v7-v9-2) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance | -| [RF-DETR](#rf-detr) | ✅ | ❌ | Supports CUDA Graphs for optimal Nvidia performance | -| [YOLO-NAS](#yolo-nas-1) | ⚠️ | ⚠️ | Not supported by CUDA Graphs | -| [YOLOX](#yolox-1) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance | +| Model | Nvidia GPU | AMD GPU | Notes | +| ------------------------------------ | ---------- | ------- | --------------------------------------------------- | +| [YOLOv9](#yolo-v3-v4-v7-v9-2) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance | +| [RF-DETR](#rf-detr) | ✅ | ⚠️ | Supports CUDA Graphs for optimal Nvidia performance | +| [YOLO-NAS](#yolo-nas-1) | ⚠️ | ⚠️ | Not supported by CUDA Graphs | +| [YOLOX](#yolox-1) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance | | [D-FINE / DEIMv2](#d-fine--deimv2-1) | ⚠️ | ❌ | Not supported by CUDA Graphs | There is no default model provided, the following formats are supported: diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 7df2ae0bb..6e98d1b7b 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -223,10 +223,11 @@ Apple Silicon can not run within a container, so a ZMQ proxy is utilized to comm With the [ROCm](../configuration/object_detectors.md#amdrocm-gpu-detector) detector Frigate can take advantage of many discrete AMD GPUs. -| Name | YOLOv9 Inference Time | YOLO-NAS Inference Time | -| --------- | --------------------------- | ------------------------- | -| AMD 780M | t-320: ~ 14 ms s-320: 20 ms | 320: ~ 25 ms 640: ~ 50 ms | -| AMD 8700G | | 320: ~ 20 ms 640: ~ 40 ms | +| Name | YOLOv9 Inference Time | YOLO-NAS Inference Time | RF-DETR Inference Time | +| -------------- | --------------------------- | ------------------------- | ---------------------- | +| AMD 780M | t-320: ~ 14 ms s-320: 20 ms | 320: ~ 25 ms 640: ~ 50 ms | | +| AMD 8700G | | 320: ~ 20 ms 640: ~ 40 ms | | +| AMD 9060XT 16G | t-320: ~ 4 ms s-320: 5 ms | 320: ~ 6 ms | Nano-320: ~ 90 ms | ## Community Supported Detectors diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index d12c8b733..8d7eb1e67 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -132,7 +132,6 @@ class ONNXModelRunner(BaseModelRunner): return model_type in [ EnrichmentModelTypeEnum.paddleocr.value, EnrichmentModelTypeEnum.jina_v2.value, - EnrichmentModelTypeEnum.arcface.value, ModelTypeEnum.rfdetr.value, ModelTypeEnum.dfine.value, ]