From 8a360eecf887ca4e38e813db87c3eae363db5b3f Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 2 Dec 2025 10:41:02 -0600 Subject: [PATCH] Refactor ROCm Support (#21132) * Remove gfx 900 support and only keep ROCm build with all variants by default * Include C++ for JIT header compilation --- .github/workflows/ci.yml | 1 - docker/rocm/Dockerfile | 17 ++++++++++------- docker/rocm/rocm.hcl | 4 ---- docker/rocm/rocm.mk | 38 -------------------------------------- 4 files changed, 10 insertions(+), 50 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b211ed0e5..42a17adb6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -136,7 +136,6 @@ jobs: *.cache-to=type=registry,ref=${{ steps.setup.outputs.cache-name }}-tensorrt,mode=max - name: AMD/ROCm general build env: - AMDGPU: gfx HSA_OVERRIDE: 0 uses: docker/bake-action@v6 with: diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index b444bb8ec..9edcd6058 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -3,7 +3,6 @@ # https://askubuntu.com/questions/972516/debian-frontend-environment-variable ARG DEBIAN_FRONTEND=noninteractive ARG ROCM=1 -ARG AMDGPU=gfx900 ARG HSA_OVERRIDE_GFX_VERSION ARG HSA_OVERRIDE @@ -11,7 +10,6 @@ ARG HSA_OVERRIDE FROM wget AS rocm ARG ROCM -ARG AMDGPU RUN apt update -qq && \ apt install -y wget gpg && \ @@ -36,7 +34,10 @@ FROM deps AS deps-prelim COPY docker/rocm/debian-backports.sources /etc/apt/sources.list.d/debian-backports.sources RUN apt-get update && \ apt-get install -y libnuma1 && \ - apt-get install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers + apt-get install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers && \ + # Install C++ standard library headers for HIPRTC kernel compilation fallback + apt-get install -qq -y libstdc++-12-dev && \ + rm -rf /var/lib/apt/lists/* WORKDIR /opt/frigate COPY --from=rootfs / / @@ -54,12 +55,14 @@ RUN pip3 uninstall -y onnxruntime \ FROM scratch AS rocm-dist ARG ROCM -ARG AMDGPU COPY --from=rocm /opt/rocm-$ROCM/bin/rocminfo /opt/rocm-$ROCM/bin/migraphx-driver /opt/rocm-$ROCM/bin/ -COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*$AMDGPU* /opt/rocm-$ROCM/share/miopen/db/ -COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx908* /opt/rocm-$ROCM/share/miopen/db/ -COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*$AMDGPU* /opt/rocm-$ROCM/lib/rocblas/library/ +# Copy MIOpen database files for gfx10xx and gfx11xx only (RDNA2/RDNA3) +COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx10* /opt/rocm-$ROCM/share/miopen/db/ +COPY --from=rocm /opt/rocm-$ROCM/share/miopen/db/*gfx11* /opt/rocm-$ROCM/share/miopen/db/ +# Copy rocBLAS library files for gfx10xx and gfx11xx only +COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*gfx10* /opt/rocm-$ROCM/lib/rocblas/library/ +COPY --from=rocm /opt/rocm-$ROCM/lib/rocblas/library/*gfx11* /opt/rocm-$ROCM/lib/rocblas/library/ COPY --from=rocm /opt/rocm-dist/ / ####################################################################### diff --git a/docker/rocm/rocm.hcl b/docker/rocm/rocm.hcl index debeee2f3..6595066c5 100644 --- a/docker/rocm/rocm.hcl +++ b/docker/rocm/rocm.hcl @@ -1,6 +1,3 @@ -variable "AMDGPU" { - default = "gfx900" -} variable "ROCM" { default = "7.1.1" } @@ -38,7 +35,6 @@ target rocm { } platforms = ["linux/amd64"] args = { - AMDGPU = AMDGPU, ROCM = ROCM, HSA_OVERRIDE_GFX_VERSION = HSA_OVERRIDE_GFX_VERSION, HSA_OVERRIDE = HSA_OVERRIDE diff --git a/docker/rocm/rocm.mk b/docker/rocm/rocm.mk index c92a458f5..f98d38772 100644 --- a/docker/rocm/rocm.mk +++ b/docker/rocm/rocm.mk @@ -1,53 +1,15 @@ BOARDS += rocm -# AMD/ROCm is chunky so we build couple of smaller images for specific chipsets -ROCM_CHIPSETS:=gfx900:9.0.0 gfx1030:10.3.0 gfx1100:11.0.0 - local-rocm: version - $(foreach chipset,$(ROCM_CHIPSETS), \ - AMDGPU=$(word 1,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE=1 \ - docker buildx bake --file=docker/rocm/rocm.hcl rocm \ - --set rocm.tags=frigate:latest-rocm-$(word 1,$(subst :, ,$(chipset))) \ - --load \ - &&) true - - unset HSA_OVERRIDE_GFX_VERSION && \ - HSA_OVERRIDE=0 \ - AMDGPU=gfx \ docker buildx bake --file=docker/rocm/rocm.hcl rocm \ --set rocm.tags=frigate:latest-rocm \ --load build-rocm: version - $(foreach chipset,$(ROCM_CHIPSETS), \ - AMDGPU=$(word 1,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE=1 \ - docker buildx bake --file=docker/rocm/rocm.hcl rocm \ - --set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm-$(chipset) \ - &&) true - - unset HSA_OVERRIDE_GFX_VERSION && \ - HSA_OVERRIDE=0 \ - AMDGPU=gfx \ docker buildx bake --file=docker/rocm/rocm.hcl rocm \ --set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm push-rocm: build-rocm - $(foreach chipset,$(ROCM_CHIPSETS), \ - AMDGPU=$(word 1,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE_GFX_VERSION=$(word 2,$(subst :, ,$(chipset))) \ - HSA_OVERRIDE=1 \ - docker buildx bake --file=docker/rocm/rocm.hcl rocm \ - --set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm-$(chipset) \ - --push \ - &&) true - - unset HSA_OVERRIDE_GFX_VERSION && \ - HSA_OVERRIDE=0 \ - AMDGPU=gfx \ docker buildx bake --file=docker/rocm/rocm.hcl rocm \ --set rocm.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-rocm \ --push