From 33042592c703b1b1479d2fad026be2dcbec5343e Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 17 Sep 2025 17:33:26 -0600 Subject: [PATCH] Update ROCm to not hang when running on complex RNN models --- docker/rocm/Dockerfile | 3 +++ frigate/detectors/plugins/onnx.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index cfd8a92e1..86bc1c229 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -67,6 +67,9 @@ FROM deps-prelim AS rocm-prelim-hsa-override0 ENV HSA_ENABLE_SDMA=0 ENV TF_ROCM_USE_IMMEDIATE_MODE=1 +# avoid kernel crashes +ENV HIP_FORCE_DEV_KERNARG=1 + COPY --from=rocm-dist / / RUN ldconfig diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index 955a58524..23c3aeb20 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -51,6 +51,13 @@ class ONNXDetector(DetectionApi): "enable_cuda_graph": True, } + sess_options = None + + if providers[0] == "ROCMExecutionProvider": + # avoid AMD GPU kernel crashes + sess_options = ort.SessionOptions() + sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL + self.model = ort.InferenceSession( path, providers=providers, provider_options=options )