mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-07 05:55:27 +03:00
ROCm Optimizations (#23118)
* Update to ROCm 7.2.3 * Add inference time for 9060XT * Update times * Update hardware info for latest ROCm * Add env vars to save kernels and miopen database * re-enable face recognition for ROCm * Update * Save LLVM cache
This commit is contained in:
parent
52a3301726
commit
76a1230885
@ -13,7 +13,7 @@ ARG ROCM
|
|||||||
|
|
||||||
RUN apt update -qq && \
|
RUN apt update -qq && \
|
||||||
apt install -y wget gpg && \
|
apt install -y wget gpg && \
|
||||||
wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.2/ubuntu/jammy/amdgpu-install_7.2.70200-1_all.deb && \
|
wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.2.3/ubuntu/jammy/amdgpu-install_7.2.3.70203-1_all.deb && \
|
||||||
apt install -y ./rocm.deb && \
|
apt install -y ./rocm.deb && \
|
||||||
apt update && \
|
apt update && \
|
||||||
apt install -qq -y rocm
|
apt install -qq -y rocm
|
||||||
@ -78,6 +78,10 @@ ENV MIGRAPHX_DISABLE_MIOPEN_FUSION=1
|
|||||||
ENV MIGRAPHX_DISABLE_SCHEDULE_PASS=1
|
ENV MIGRAPHX_DISABLE_SCHEDULE_PASS=1
|
||||||
ENV MIGRAPHX_DISABLE_REDUCE_FUSION=1
|
ENV MIGRAPHX_DISABLE_REDUCE_FUSION=1
|
||||||
ENV MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1
|
ENV MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1
|
||||||
|
ENV MIOPEN_CUSTOM_CACHE_DIR=/config/model_cache/migraphx
|
||||||
|
ENV MIOPEN_USER_DB_PATH=/config/model_cache/migraphx
|
||||||
|
ENV AMD_COMGR_CACHE=1
|
||||||
|
ENV AMD_COMGR_CACHE_DIR=/config/model_cache/migraphx
|
||||||
|
|
||||||
COPY --from=rocm-dist / /
|
COPY --from=rocm-dist / /
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.2.0/onnxruntime_migraphx-1.23.1-cp311-cp311-linux_x86_64.whl
|
onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.2.3-1/onnxruntime_migraphx-1.24.4-cp311-cp311-linux_x86_64.whl
|
||||||
@ -1,5 +1,5 @@
|
|||||||
variable "ROCM" {
|
variable "ROCM" {
|
||||||
default = "7.2.0"
|
default = "7.2.3"
|
||||||
}
|
}
|
||||||
variable "HSA_OVERRIDE_GFX_VERSION" {
|
variable "HSA_OVERRIDE_GFX_VERSION" {
|
||||||
default = ""
|
default = ""
|
||||||
|
|||||||
@ -1023,9 +1023,9 @@ detectors:
|
|||||||
### ONNX Supported Models
|
### ONNX Supported Models
|
||||||
|
|
||||||
| Model | Nvidia GPU | AMD GPU | Notes |
|
| Model | Nvidia GPU | AMD GPU | Notes |
|
||||||
| ----------------------------- | ---------- | ------- | --------------------------------------------------- |
|
| ------------------------------------ | ---------- | ------- | --------------------------------------------------- |
|
||||||
| [YOLOv9](#yolo-v3-v4-v7-v9-2) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance |
|
| [YOLOv9](#yolo-v3-v4-v7-v9-2) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance |
|
||||||
| [RF-DETR](#rf-detr) | ✅ | ❌ | Supports CUDA Graphs for optimal Nvidia performance |
|
| [RF-DETR](#rf-detr) | ✅ | ⚠️ | Supports CUDA Graphs for optimal Nvidia performance |
|
||||||
| [YOLO-NAS](#yolo-nas-1) | ⚠️ | ⚠️ | Not supported by CUDA Graphs |
|
| [YOLO-NAS](#yolo-nas-1) | ⚠️ | ⚠️ | Not supported by CUDA Graphs |
|
||||||
| [YOLOX](#yolox-1) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance |
|
| [YOLOX](#yolox-1) | ✅ | ✅ | Supports CUDA Graphs for optimal Nvidia performance |
|
||||||
| [D-FINE / DEIMv2](#d-fine--deimv2-1) | ⚠️ | ❌ | Not supported by CUDA Graphs |
|
| [D-FINE / DEIMv2](#d-fine--deimv2-1) | ⚠️ | ❌ | Not supported by CUDA Graphs |
|
||||||
|
|||||||
@ -223,10 +223,11 @@ Apple Silicon can not run within a container, so a ZMQ proxy is utilized to comm
|
|||||||
|
|
||||||
With the [ROCm](../configuration/object_detectors.md#amdrocm-gpu-detector) detector Frigate can take advantage of many discrete AMD GPUs.
|
With the [ROCm](../configuration/object_detectors.md#amdrocm-gpu-detector) detector Frigate can take advantage of many discrete AMD GPUs.
|
||||||
|
|
||||||
| Name | YOLOv9 Inference Time | YOLO-NAS Inference Time |
|
| Name | YOLOv9 Inference Time | YOLO-NAS Inference Time | RF-DETR Inference Time |
|
||||||
| --------- | --------------------------- | ------------------------- |
|
| -------------- | --------------------------- | ------------------------- | ---------------------- |
|
||||||
| AMD 780M | t-320: ~ 14 ms s-320: 20 ms | 320: ~ 25 ms 640: ~ 50 ms |
|
| AMD 780M | t-320: ~ 14 ms s-320: 20 ms | 320: ~ 25 ms 640: ~ 50 ms | |
|
||||||
| AMD 8700G | | 320: ~ 20 ms 640: ~ 40 ms |
|
| AMD 8700G | | 320: ~ 20 ms 640: ~ 40 ms | |
|
||||||
|
| AMD 9060XT 16G | t-320: ~ 4 ms s-320: 5 ms | 320: ~ 6 ms | Nano-320: ~ 90 ms |
|
||||||
|
|
||||||
## Community Supported Detectors
|
## Community Supported Detectors
|
||||||
|
|
||||||
|
|||||||
@ -132,7 +132,6 @@ class ONNXModelRunner(BaseModelRunner):
|
|||||||
return model_type in [
|
return model_type in [
|
||||||
EnrichmentModelTypeEnum.paddleocr.value,
|
EnrichmentModelTypeEnum.paddleocr.value,
|
||||||
EnrichmentModelTypeEnum.jina_v2.value,
|
EnrichmentModelTypeEnum.jina_v2.value,
|
||||||
EnrichmentModelTypeEnum.arcface.value,
|
|
||||||
ModelTypeEnum.rfdetr.value,
|
ModelTypeEnum.rfdetr.value,
|
||||||
ModelTypeEnum.dfine.value,
|
ModelTypeEnum.dfine.value,
|
||||||
]
|
]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user