diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 86bc1c229..c53505a50 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -15,14 +15,14 @@ ARG AMDGPU RUN apt update -qq && \ apt install -y wget gpg && \ - wget -O rocm.deb https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/jammy/amdgpu-install_6.4.60401-1_all.deb && \ + wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.0.1/ubuntu/jammy/amdgpu-install_7.0.1.70001-1_all.deb && \ apt install -y ./rocm.deb && \ apt update && \ apt install -qq -y rocm RUN mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib RUN cd /opt/rocm-$ROCM/lib && \ - cp -dpr libMIOpen*.so* libamd*.so* libhip*.so* libhsa*.so* libmigraphx*.so* librocm*.so* librocblas*.so* libroctracer*.so* librocsolver*.so* librocfft*.so* librocprofiler*.so* libroctx*.so* /opt/rocm-dist/opt/rocm-$ROCM/lib/ && \ + cp -dpr libMIOpen*.so* libamd*.so* libhip*.so* libhsa*.so* libmigraphx*.so* librocm*.so* librocblas*.so* libroctracer*.so* librocsolver*.so* librocfft*.so* librocprofiler*.so* libroctx*.so* librocroller.so* /opt/rocm-dist/opt/rocm-$ROCM/lib/ && \ mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib && \ cp -dpr migraphx/lib/* /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib RUN cd /opt/rocm-dist/opt/ && ln -s rocm-$ROCM rocm @@ -64,11 +64,10 @@ COPY --from=rocm /opt/rocm-dist/ / ####################################################################### FROM deps-prelim AS rocm-prelim-hsa-override0 -ENV HSA_ENABLE_SDMA=0 -ENV TF_ROCM_USE_IMMEDIATE_MODE=1 - -# avoid kernel crashes -ENV HIP_FORCE_DEV_KERNARG=1 +ENV MIGRAPHX_DISABLE_MIOPEN_FUSION=1 +ENV MIGRAPHX_DISABLE_SCHEDULE_PASS=1 +ENV MIGRAPHX_DISABLE_REDUCE_FUSION=1 +ENV MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1 COPY --from=rocm-dist / / diff --git a/docker/rocm/requirements-wheels-rocm.txt b/docker/rocm/requirements-wheels-rocm.txt index 21aebf4bd..611dc27cc 100644 --- a/docker/rocm/requirements-wheels-rocm.txt +++ b/docker/rocm/requirements-wheels-rocm.txt @@ -1 +1 @@ -onnxruntime-rocm @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v6.4.1/onnxruntime_rocm-1.21.1-cp311-cp311-linux_x86_64.whl \ No newline at end of file +onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.0.1/onnxruntime_migraphx-1.23.0-cp311-cp311-linux_x86_64.whl \ No newline at end of file diff --git a/docker/rocm/rocm.hcl b/docker/rocm/rocm.hcl index 0745a9f3d..4144316ea 100644 --- a/docker/rocm/rocm.hcl +++ b/docker/rocm/rocm.hcl @@ -2,7 +2,7 @@ variable "AMDGPU" { default = "gfx900" } variable "ROCM" { - default = "6.4.1" + default = "7.0.1" } variable "HSA_OVERRIDE_GFX_VERSION" { default = "" diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 25c30d0f1..e352a6a9a 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -555,6 +555,17 @@ $ docker exec -it frigate /bin/bash -c '(unset HSA_OVERRIDE_GFX_VERSION && /opt/ ### ROCm Supported Models +:::tip + +The AMD GPU kernel is known problematic especially when converting models to mxr format. The recommended approach is: + +1. Disable object detection in the config. +2. Startup Frigate with the onnx detector configured, the main object detection model will be converted to mxr format and cached in the config directory. +3. Once this is finished as indicated by the logs, enable object detection in the UI and confirm that it is working correctly. +4. Re-enable object detection in the config. + +::: + See [ONNX supported models](#supported-models) for supported models, there are some caveats: - D-FINE models are not supported @@ -781,19 +792,19 @@ To verify that the integration is working correctly, start Frigate and observe t # Community Supported Detectors -## MemryX MX3 +## MemryX MX3 -This detector is available for use with the MemryX MX3 accelerator M.2 module. Frigate supports the MX3 on compatible hardware platforms, providing efficient and high-performance object detection. +This detector is available for use with the MemryX MX3 accelerator M.2 module. Frigate supports the MX3 on compatible hardware platforms, providing efficient and high-performance object detection. See the [installation docs](../frigate/installation.md#memryx-mx3) for information on configuring the MemryX hardware. To configure a MemryX detector, simply set the `type` attribute to `memryx` and follow the configuration guide below. -### Configuration +### Configuration -To configure the MemryX detector, use the following example configuration: +To configure the MemryX detector, use the following example configuration: -#### Single PCIe MemryX MX3 +#### Single PCIe MemryX MX3 ```yaml detectors: @@ -819,7 +830,7 @@ detectors: device: PCIe:2 ``` -### Supported Models +### Supported Models MemryX `.dfp` models are automatically downloaded at runtime, if enabled, to the container at `/memryx_models/model_folder/`. @@ -833,9 +844,9 @@ The input size for **YOLO-NAS** can be set to either **320x320** (default) or ** - The default size of **320x320** is optimized for lower CPU usage and faster inference times. -##### Configuration +##### Configuration -Below is the recommended configuration for using the **YOLO-NAS** (small) model with the MemryX detector: +Below is the recommended configuration for using the **YOLO-NAS** (small) model with the MemryX detector: ```yaml detectors: @@ -857,13 +868,13 @@ model: # └── yolonas_post.onnx (optional; only if the model includes a cropped post-processing network) ``` -#### YOLOv9 +#### YOLOv9 The YOLOv9s model included in this detector is downloaded from [the original GitHub](https://github.com/WongKinYiu/yolov9) like in the [Models Section](#yolov9-1) and compiled to DFP with [mx_nc](https://developer.memryx.com/tools/neural_compiler.html#usage). ##### Configuration -Below is the recommended configuration for using the **YOLOv9** (small) model with the MemryX detector: +Below is the recommended configuration for using the **YOLOv9** (small) model with the MemryX detector: ```yaml detectors: @@ -872,7 +883,7 @@ detectors: device: PCIe:0 model: - model_type: yolo-generic + model_type: yolo-generic width: 320 # (Can be set to 640 for higher resolution) height: 320 # (Can be set to 640 for higher resolution) input_tensor: nchw @@ -885,13 +896,13 @@ model: # └── yolov9_post.onnx (optional; only if the model includes a cropped post-processing network) ``` -#### YOLOX +#### YOLOX The model is sourced from the [OpenCV Model Zoo](https://github.com/opencv/opencv_zoo) and precompiled to DFP. -##### Configuration +##### Configuration -Below is the recommended configuration for using the **YOLOX** (small) model with the MemryX detector: +Below is the recommended configuration for using the **YOLOX** (small) model with the MemryX detector: ```yaml detectors: @@ -912,13 +923,13 @@ model: # ├── yolox.dfp (a file ending with .dfp) ``` -#### SSDLite MobileNet v2 +#### SSDLite MobileNet v2 The model is sourced from the [OpenMMLab Model Zoo](https://mmdeploy-oss.openmmlab.com/model/mmdet-det/ssdlite-e8679f.onnx) and has been converted to DFP. -##### Configuration +##### Configuration -Below is the recommended configuration for using the **SSDLite MobileNet v2** model with the MemryX detector: +Below is the recommended configuration for using the **SSDLite MobileNet v2** model with the MemryX detector: ```yaml detectors: diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index 19c91e487..5d1cbf7c2 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -78,6 +78,21 @@ class BaseModelRunner(ABC): class ONNXModelRunner(BaseModelRunner): """Run ONNX models using ONNX Runtime.""" + @staticmethod + def is_migraphx_complex_model(model_type: str) -> bool: + # Import here to avoid circular imports + from frigate.detectors.detector_config import ModelTypeEnum + from frigate.embeddings.types import EnrichmentModelTypeEnum + + return model_type in [ + EnrichmentModelTypeEnum.paddleocr.value, + EnrichmentModelTypeEnum.jina_v1.value, + EnrichmentModelTypeEnum.jina_v2.value, + EnrichmentModelTypeEnum.facenet.value, + ModelTypeEnum.rfdetr.value, + ModelTypeEnum.dfine.value, + ] + def __init__(self, ort: ort.InferenceSession): self.ort = ort @@ -441,6 +456,15 @@ def get_optimized_runner( options[0]["device_id"], ) + if ( + providers + and providers[0] == "MIGraphXExecutionProvider" + and ONNXModelRunner.is_migraphx_complex_model(model_type) + ): + # Don't use MIGraphX for models that are not supported + providers.pop(0) + options.pop(0) + return ONNXModelRunner( ort.InferenceSession( model_path, diff --git a/frigate/util/model.py b/frigate/util/model.py index 45a3a6c8b..308a16689 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -284,7 +284,9 @@ def post_process_yolox( def get_ort_providers( - force_cpu: bool = False, device: str | None = "AUTO", requires_fp16: bool = False + force_cpu: bool = False, + device: str | None = "AUTO", + requires_fp16: bool = False, ) -> tuple[list[str], list[dict[str, Any]]]: if force_cpu: return ( @@ -351,12 +353,15 @@ def get_ort_providers( } ) elif provider == "MIGraphXExecutionProvider": - # MIGraphX uses more CPU than ROCM, while also being the same speed - if device == "MIGraphX": - providers.append(provider) - options.append({}) - else: - continue + migraphx_cache_dir = os.path.join(MODEL_CACHE_DIR, "migraphx") + os.makedirs(migraphx_cache_dir, exist_ok=True) + + providers.append(provider) + options.append( + { + "migraphx_model_cache_dir": migraphx_cache_dir, + } + ) elif provider == "CPUExecutionProvider": providers.append(provider) options.append(