mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-12-06 05:24:11 +03:00
Refactor AMD GPU support (#20239)
Some checks are pending
CI / ARM Extra Build (push) Blocked by required conditions
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
Some checks are pending
CI / ARM Extra Build (push) Blocked by required conditions
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Update ROCm to 7.0.1 * Update ONNXRuntime * Add back in * Get basic detection working * Use env vars * Handle complex migraphx models * Enable model caching * Remove unused * Add tip to docs
This commit is contained in:
parent
e6cbc93703
commit
c207009d8a
@ -15,14 +15,14 @@ ARG AMDGPU
|
|||||||
|
|
||||||
RUN apt update -qq && \
|
RUN apt update -qq && \
|
||||||
apt install -y wget gpg && \
|
apt install -y wget gpg && \
|
||||||
wget -O rocm.deb https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/jammy/amdgpu-install_6.4.60401-1_all.deb && \
|
wget -O rocm.deb https://repo.radeon.com/amdgpu-install/7.0.1/ubuntu/jammy/amdgpu-install_7.0.1.70001-1_all.deb && \
|
||||||
apt install -y ./rocm.deb && \
|
apt install -y ./rocm.deb && \
|
||||||
apt update && \
|
apt update && \
|
||||||
apt install -qq -y rocm
|
apt install -qq -y rocm
|
||||||
|
|
||||||
RUN mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib
|
RUN mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib
|
||||||
RUN cd /opt/rocm-$ROCM/lib && \
|
RUN cd /opt/rocm-$ROCM/lib && \
|
||||||
cp -dpr libMIOpen*.so* libamd*.so* libhip*.so* libhsa*.so* libmigraphx*.so* librocm*.so* librocblas*.so* libroctracer*.so* librocsolver*.so* librocfft*.so* librocprofiler*.so* libroctx*.so* /opt/rocm-dist/opt/rocm-$ROCM/lib/ && \
|
cp -dpr libMIOpen*.so* libamd*.so* libhip*.so* libhsa*.so* libmigraphx*.so* librocm*.so* librocblas*.so* libroctracer*.so* librocsolver*.so* librocfft*.so* librocprofiler*.so* libroctx*.so* librocroller.so* /opt/rocm-dist/opt/rocm-$ROCM/lib/ && \
|
||||||
mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib && \
|
mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib && \
|
||||||
cp -dpr migraphx/lib/* /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib
|
cp -dpr migraphx/lib/* /opt/rocm-dist/opt/rocm-$ROCM/lib/migraphx/lib
|
||||||
RUN cd /opt/rocm-dist/opt/ && ln -s rocm-$ROCM rocm
|
RUN cd /opt/rocm-dist/opt/ && ln -s rocm-$ROCM rocm
|
||||||
@ -64,11 +64,10 @@ COPY --from=rocm /opt/rocm-dist/ /
|
|||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
FROM deps-prelim AS rocm-prelim-hsa-override0
|
FROM deps-prelim AS rocm-prelim-hsa-override0
|
||||||
ENV HSA_ENABLE_SDMA=0
|
ENV MIGRAPHX_DISABLE_MIOPEN_FUSION=1
|
||||||
ENV TF_ROCM_USE_IMMEDIATE_MODE=1
|
ENV MIGRAPHX_DISABLE_SCHEDULE_PASS=1
|
||||||
|
ENV MIGRAPHX_DISABLE_REDUCE_FUSION=1
|
||||||
# avoid kernel crashes
|
ENV MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1
|
||||||
ENV HIP_FORCE_DEV_KERNARG=1
|
|
||||||
|
|
||||||
COPY --from=rocm-dist / /
|
COPY --from=rocm-dist / /
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
onnxruntime-rocm @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v6.4.1/onnxruntime_rocm-1.21.1-cp311-cp311-linux_x86_64.whl
|
onnxruntime-migraphx @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v7.0.1/onnxruntime_migraphx-1.23.0-cp311-cp311-linux_x86_64.whl
|
||||||
@ -2,7 +2,7 @@ variable "AMDGPU" {
|
|||||||
default = "gfx900"
|
default = "gfx900"
|
||||||
}
|
}
|
||||||
variable "ROCM" {
|
variable "ROCM" {
|
||||||
default = "6.4.1"
|
default = "7.0.1"
|
||||||
}
|
}
|
||||||
variable "HSA_OVERRIDE_GFX_VERSION" {
|
variable "HSA_OVERRIDE_GFX_VERSION" {
|
||||||
default = ""
|
default = ""
|
||||||
|
|||||||
@ -555,6 +555,17 @@ $ docker exec -it frigate /bin/bash -c '(unset HSA_OVERRIDE_GFX_VERSION && /opt/
|
|||||||
|
|
||||||
### ROCm Supported Models
|
### ROCm Supported Models
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
The AMD GPU kernel is known problematic especially when converting models to mxr format. The recommended approach is:
|
||||||
|
|
||||||
|
1. Disable object detection in the config.
|
||||||
|
2. Startup Frigate with the onnx detector configured, the main object detection model will be converted to mxr format and cached in the config directory.
|
||||||
|
3. Once this is finished as indicated by the logs, enable object detection in the UI and confirm that it is working correctly.
|
||||||
|
4. Re-enable object detection in the config.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
See [ONNX supported models](#supported-models) for supported models, there are some caveats:
|
See [ONNX supported models](#supported-models) for supported models, there are some caveats:
|
||||||
|
|
||||||
- D-FINE models are not supported
|
- D-FINE models are not supported
|
||||||
@ -781,19 +792,19 @@ To verify that the integration is working correctly, start Frigate and observe t
|
|||||||
|
|
||||||
# Community Supported Detectors
|
# Community Supported Detectors
|
||||||
|
|
||||||
## MemryX MX3
|
## MemryX MX3
|
||||||
|
|
||||||
This detector is available for use with the MemryX MX3 accelerator M.2 module. Frigate supports the MX3 on compatible hardware platforms, providing efficient and high-performance object detection.
|
This detector is available for use with the MemryX MX3 accelerator M.2 module. Frigate supports the MX3 on compatible hardware platforms, providing efficient and high-performance object detection.
|
||||||
|
|
||||||
See the [installation docs](../frigate/installation.md#memryx-mx3) for information on configuring the MemryX hardware.
|
See the [installation docs](../frigate/installation.md#memryx-mx3) for information on configuring the MemryX hardware.
|
||||||
|
|
||||||
To configure a MemryX detector, simply set the `type` attribute to `memryx` and follow the configuration guide below.
|
To configure a MemryX detector, simply set the `type` attribute to `memryx` and follow the configuration guide below.
|
||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
To configure the MemryX detector, use the following example configuration:
|
To configure the MemryX detector, use the following example configuration:
|
||||||
|
|
||||||
#### Single PCIe MemryX MX3
|
#### Single PCIe MemryX MX3
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
detectors:
|
detectors:
|
||||||
@ -819,7 +830,7 @@ detectors:
|
|||||||
device: PCIe:2
|
device: PCIe:2
|
||||||
```
|
```
|
||||||
|
|
||||||
### Supported Models
|
### Supported Models
|
||||||
|
|
||||||
MemryX `.dfp` models are automatically downloaded at runtime, if enabled, to the container at `/memryx_models/model_folder/`.
|
MemryX `.dfp` models are automatically downloaded at runtime, if enabled, to the container at `/memryx_models/model_folder/`.
|
||||||
|
|
||||||
@ -833,9 +844,9 @@ The input size for **YOLO-NAS** can be set to either **320x320** (default) or **
|
|||||||
|
|
||||||
- The default size of **320x320** is optimized for lower CPU usage and faster inference times.
|
- The default size of **320x320** is optimized for lower CPU usage and faster inference times.
|
||||||
|
|
||||||
##### Configuration
|
##### Configuration
|
||||||
|
|
||||||
Below is the recommended configuration for using the **YOLO-NAS** (small) model with the MemryX detector:
|
Below is the recommended configuration for using the **YOLO-NAS** (small) model with the MemryX detector:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
detectors:
|
detectors:
|
||||||
@ -857,13 +868,13 @@ model:
|
|||||||
# └── yolonas_post.onnx (optional; only if the model includes a cropped post-processing network)
|
# └── yolonas_post.onnx (optional; only if the model includes a cropped post-processing network)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### YOLOv9
|
#### YOLOv9
|
||||||
|
|
||||||
The YOLOv9s model included in this detector is downloaded from [the original GitHub](https://github.com/WongKinYiu/yolov9) like in the [Models Section](#yolov9-1) and compiled to DFP with [mx_nc](https://developer.memryx.com/tools/neural_compiler.html#usage).
|
The YOLOv9s model included in this detector is downloaded from [the original GitHub](https://github.com/WongKinYiu/yolov9) like in the [Models Section](#yolov9-1) and compiled to DFP with [mx_nc](https://developer.memryx.com/tools/neural_compiler.html#usage).
|
||||||
|
|
||||||
##### Configuration
|
##### Configuration
|
||||||
|
|
||||||
Below is the recommended configuration for using the **YOLOv9** (small) model with the MemryX detector:
|
Below is the recommended configuration for using the **YOLOv9** (small) model with the MemryX detector:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
detectors:
|
detectors:
|
||||||
@ -872,7 +883,7 @@ detectors:
|
|||||||
device: PCIe:0
|
device: PCIe:0
|
||||||
|
|
||||||
model:
|
model:
|
||||||
model_type: yolo-generic
|
model_type: yolo-generic
|
||||||
width: 320 # (Can be set to 640 for higher resolution)
|
width: 320 # (Can be set to 640 for higher resolution)
|
||||||
height: 320 # (Can be set to 640 for higher resolution)
|
height: 320 # (Can be set to 640 for higher resolution)
|
||||||
input_tensor: nchw
|
input_tensor: nchw
|
||||||
@ -885,13 +896,13 @@ model:
|
|||||||
# └── yolov9_post.onnx (optional; only if the model includes a cropped post-processing network)
|
# └── yolov9_post.onnx (optional; only if the model includes a cropped post-processing network)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### YOLOX
|
#### YOLOX
|
||||||
|
|
||||||
The model is sourced from the [OpenCV Model Zoo](https://github.com/opencv/opencv_zoo) and precompiled to DFP.
|
The model is sourced from the [OpenCV Model Zoo](https://github.com/opencv/opencv_zoo) and precompiled to DFP.
|
||||||
|
|
||||||
##### Configuration
|
##### Configuration
|
||||||
|
|
||||||
Below is the recommended configuration for using the **YOLOX** (small) model with the MemryX detector:
|
Below is the recommended configuration for using the **YOLOX** (small) model with the MemryX detector:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
detectors:
|
detectors:
|
||||||
@ -912,13 +923,13 @@ model:
|
|||||||
# ├── yolox.dfp (a file ending with .dfp)
|
# ├── yolox.dfp (a file ending with .dfp)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### SSDLite MobileNet v2
|
#### SSDLite MobileNet v2
|
||||||
|
|
||||||
The model is sourced from the [OpenMMLab Model Zoo](https://mmdeploy-oss.openmmlab.com/model/mmdet-det/ssdlite-e8679f.onnx) and has been converted to DFP.
|
The model is sourced from the [OpenMMLab Model Zoo](https://mmdeploy-oss.openmmlab.com/model/mmdet-det/ssdlite-e8679f.onnx) and has been converted to DFP.
|
||||||
|
|
||||||
##### Configuration
|
##### Configuration
|
||||||
|
|
||||||
Below is the recommended configuration for using the **SSDLite MobileNet v2** model with the MemryX detector:
|
Below is the recommended configuration for using the **SSDLite MobileNet v2** model with the MemryX detector:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
detectors:
|
detectors:
|
||||||
|
|||||||
@ -78,6 +78,21 @@ class BaseModelRunner(ABC):
|
|||||||
class ONNXModelRunner(BaseModelRunner):
|
class ONNXModelRunner(BaseModelRunner):
|
||||||
"""Run ONNX models using ONNX Runtime."""
|
"""Run ONNX models using ONNX Runtime."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_migraphx_complex_model(model_type: str) -> bool:
|
||||||
|
# Import here to avoid circular imports
|
||||||
|
from frigate.detectors.detector_config import ModelTypeEnum
|
||||||
|
from frigate.embeddings.types import EnrichmentModelTypeEnum
|
||||||
|
|
||||||
|
return model_type in [
|
||||||
|
EnrichmentModelTypeEnum.paddleocr.value,
|
||||||
|
EnrichmentModelTypeEnum.jina_v1.value,
|
||||||
|
EnrichmentModelTypeEnum.jina_v2.value,
|
||||||
|
EnrichmentModelTypeEnum.facenet.value,
|
||||||
|
ModelTypeEnum.rfdetr.value,
|
||||||
|
ModelTypeEnum.dfine.value,
|
||||||
|
]
|
||||||
|
|
||||||
def __init__(self, ort: ort.InferenceSession):
|
def __init__(self, ort: ort.InferenceSession):
|
||||||
self.ort = ort
|
self.ort = ort
|
||||||
|
|
||||||
@ -441,6 +456,15 @@ def get_optimized_runner(
|
|||||||
options[0]["device_id"],
|
options[0]["device_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
providers
|
||||||
|
and providers[0] == "MIGraphXExecutionProvider"
|
||||||
|
and ONNXModelRunner.is_migraphx_complex_model(model_type)
|
||||||
|
):
|
||||||
|
# Don't use MIGraphX for models that are not supported
|
||||||
|
providers.pop(0)
|
||||||
|
options.pop(0)
|
||||||
|
|
||||||
return ONNXModelRunner(
|
return ONNXModelRunner(
|
||||||
ort.InferenceSession(
|
ort.InferenceSession(
|
||||||
model_path,
|
model_path,
|
||||||
|
|||||||
@ -284,7 +284,9 @@ def post_process_yolox(
|
|||||||
|
|
||||||
|
|
||||||
def get_ort_providers(
|
def get_ort_providers(
|
||||||
force_cpu: bool = False, device: str | None = "AUTO", requires_fp16: bool = False
|
force_cpu: bool = False,
|
||||||
|
device: str | None = "AUTO",
|
||||||
|
requires_fp16: bool = False,
|
||||||
) -> tuple[list[str], list[dict[str, Any]]]:
|
) -> tuple[list[str], list[dict[str, Any]]]:
|
||||||
if force_cpu:
|
if force_cpu:
|
||||||
return (
|
return (
|
||||||
@ -351,12 +353,15 @@ def get_ort_providers(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif provider == "MIGraphXExecutionProvider":
|
elif provider == "MIGraphXExecutionProvider":
|
||||||
# MIGraphX uses more CPU than ROCM, while also being the same speed
|
migraphx_cache_dir = os.path.join(MODEL_CACHE_DIR, "migraphx")
|
||||||
if device == "MIGraphX":
|
os.makedirs(migraphx_cache_dir, exist_ok=True)
|
||||||
providers.append(provider)
|
|
||||||
options.append({})
|
providers.append(provider)
|
||||||
else:
|
options.append(
|
||||||
continue
|
{
|
||||||
|
"migraphx_model_cache_dir": migraphx_cache_dir,
|
||||||
|
}
|
||||||
|
)
|
||||||
elif provider == "CPUExecutionProvider":
|
elif provider == "CPUExecutionProvider":
|
||||||
providers.append(provider)
|
providers.append(provider)
|
||||||
options.append(
|
options.append(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user