Improve Intel Stats (#23190)

* Implement per intel-gpu stats collection * Improve device naming * Improve GPU vendor handling * Cleanup
2026-05-15 09:50:51 +03:00 · 2026-05-13 15:12:48 -06:00 · 2026-05-13 15:12:48 -06:00 · 78fc472026
commit 78fc472026
parent c8cfb9400a
6 changed files with 214 additions and 60 deletions
--- a/frigate/stats/intel_gpu_info.py
+++ b/frigate/stats/intel_gpu_info.py
@ -0,0 +1,109 @@
+"""Resolve human-readable names for Intel GPUs via OpenVINO."""
+
+import logging
+import re
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+class IntelGpuNameResolver:
+    """Build a pdev -> normalized device name map by enumerating OpenVINO GPUs.
+
+    The lookup is performed once on first access and cached for the process
+    lifetime. OpenVINO exposes DEVICE_PCI_INFO (domain/bus/device/function) and
+    FULL_DEVICE_NAME for each GPU it can see, which is enough to associate the
+    name with the pdev string used by DRM fdinfo.
+    """
+
+    _names: Optional[dict[str, str]] = None
+
+    def get_names(self) -> dict[str, str]:
+        if self._names is not None:
+            return self._names
+
+        names: dict[str, str] = {}
+
+        try:
+            from openvino import Core
+        except ImportError:
+            logger.debug("OpenVINO unavailable; cannot resolve Intel GPU names")
+            self._names = names
+            return names
+
+        try:
+            core = Core()
+            devices = core.available_devices
+        except Exception as exc:
+            logger.debug(f"OpenVINO Core initialization failed: {exc}")
+            self._names = names
+            return names
+
+        cpu_name: Optional[str] = None
+        if "CPU" in devices:
+            try:
+                cpu_name = self._strip_trademarks(
+                    core.get_property("CPU", "FULL_DEVICE_NAME")
+                )
+            except Exception as exc:
+                logger.debug(f"Failed to read CPU FULL_DEVICE_NAME: {exc}")
+
+        for device in devices:
+            if not device.startswith("GPU"):
+                continue
+
+            try:
+                pci = core.get_property(device, "DEVICE_PCI_INFO")
+                raw_name = core.get_property(device, "FULL_DEVICE_NAME")
+                device_type = core.get_property(device, "DEVICE_TYPE")
+            except Exception as exc:
+                logger.debug(f"Failed to read properties for {device}: {exc}")
+                continue
+
+            pdev = self._format_pdev(pci)
+            if not pdev:
+                continue
+
+            names[pdev] = self._resolve_name(raw_name, device_type, cpu_name)
+
+        self._names = names
+        return names
+
+    @staticmethod
+    def _format_pdev(pci) -> Optional[str]:
+        try:
+            return f"{pci.domain:04x}:{pci.bus:02x}:{pci.device:02x}.{pci.function:x}"
+        except AttributeError:
+            return None
+
+    @classmethod
+    def _resolve_name(cls, raw_name: str, device_type, cpu_name: Optional[str]) -> str:
+        """Build a display name for a GPU.
+
+        Modern integrated Intel GPUs are reported by OpenVINO with a generic
+        FULL_DEVICE_NAME like "Intel(R) Graphics (iGPU)" that gives no model
+        information. Since the iGPU is part of the CPU on these platforms, fall
+        back to the CPU name (which OpenVINO does report specifically) and
+        suffix it with "iGPU" so it's clear what the entry is.
+        """
+        is_integrated = "INTEGRATED" in str(device_type).upper()
+
+        if is_integrated and cpu_name:
+            short_cpu = re.sub(r"^Intel\s+", "", cpu_name)
+            return f"{short_cpu} iGPU"
+
+        return cls._normalize_name(raw_name)
+
+    @classmethod
+    def _normalize_name(cls, name: str) -> str:
+        cleaned = cls._strip_trademarks(name)
+        cleaned = re.sub(r"\s*\((?:i|d)GPU\)\s*$", "", cleaned, flags=re.IGNORECASE)
+        return " ".join(cleaned.split())
+
+    @staticmethod
+    def _strip_trademarks(name: str) -> str:
+        cleaned = re.sub(r"\(R\)|\(TM\)", "", name)
+        return " ".join(cleaned.split())
+
+
+intel_gpu_name_resolver = IntelGpuNameResolver()
--- a/frigate/stats/util.py
+++ b/frigate/stats/util.py
@ -230,6 +230,7 @@ async def set_gpu_stats(
                hwaccel_args.append(args)

    stats: dict[str, dict] = {}
+    intel_gpu_collected = False

    for args in hwaccel_args:
        if args in hwaccel_errors:
@ -242,6 +243,7 @@ async def set_gpu_stats(
            if nvidia_usage:
                for i in range(len(nvidia_usage)):
                    stats[nvidia_usage[i]["name"]] = {
+                        "vendor": "nvidia",
                        "gpu": str(round(float(nvidia_usage[i]["gpu"]), 2)) + "%",
                        "mem": str(round(float(nvidia_usage[i]["mem"]), 2)) + "%",
                        "enc": str(round(float(nvidia_usage[i]["enc"]), 2)) + "%",
@ -250,31 +252,34 @@ async def set_gpu_stats(
                    }

            else:
-                stats["nvidia-gpu"] = {"gpu": "", "mem": ""}
+                stats["nvidia-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
                hwaccel_errors.append(args)
        elif "nvmpi" in args or "jetson" in args:
            # nvidia Jetson
            jetson_usage = get_jetson_stats()

            if jetson_usage:
-                stats["jetson-gpu"] = jetson_usage
+                stats["jetson-gpu"] = {"vendor": "nvidia", **jetson_usage}
            else:
-                stats["jetson-gpu"] = {"gpu": "", "mem": ""}
+                stats["jetson-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
                hwaccel_errors.append(args)
        elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
            if not config.telemetry.stats.intel_gpu_stats:
                continue

-            if "intel-gpu" not in stats:
+            if not intel_gpu_collected:
                # intel GPU (QSV or VAAPI both use the same physical GPU)
+                intel_gpu_collected = True
                intel_usage = get_intel_gpu_stats(
                    config.telemetry.stats.intel_gpu_device
                )

-                if intel_usage is not None:
-                    stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
+                if intel_usage:
+                    for entry in intel_usage.values():
+                        name = entry.pop("name")
+                        stats[name] = entry
                else:
-                    stats["intel-gpu"] = {"gpu": "", "mem": ""}
+                    stats["intel-gpu"] = {"vendor": "intel", "gpu": "", "mem": ""}
                    hwaccel_errors.append(args)
        elif "vaapi" in args:
            if not config.telemetry.stats.amd_gpu_stats:
@ -284,18 +289,18 @@ async def set_gpu_stats(
            amd_usage = get_amd_gpu_stats()

            if amd_usage:
-                stats["amd-vaapi"] = amd_usage
+                stats["amd-vaapi"] = {"vendor": "amd", **amd_usage}
            else:
-                stats["amd-vaapi"] = {"gpu": "", "mem": ""}
+                stats["amd-vaapi"] = {"vendor": "amd", "gpu": "", "mem": ""}
                hwaccel_errors.append(args)
        elif "preset-rk" in args:
            rga_usage = get_rockchip_gpu_stats()

            if rga_usage:
-                stats["rockchip"] = rga_usage
+                stats["rockchip"] = {"vendor": "rockchip", **rga_usage}
        elif "v4l2m2m" in args or "rpi" in args:
            # RPi v4l2m2m is currently not able to get usage stats
-            stats["rpi-v4l2m2m"] = {"gpu": "", "mem": ""}
+            stats["rpi-v4l2m2m"] = {"vendor": "rpi", "gpu": "", "mem": ""}

    if stats:
        all_stats["gpu_usages"] = stats
--- a/frigate/test/test_gpu_stats.py
+++ b/frigate/test/test_gpu_stats.py
@ -17,12 +17,14 @@ class TestGpuStats(unittest.TestCase):
        amd_stats = get_amd_gpu_stats()
        assert amd_stats == {"gpu": "4.17%", "mem": "60.37%"}

+    @patch("frigate.stats.intel_gpu_info.intel_gpu_name_resolver.get_names")
    @patch("frigate.util.services.time.sleep")
    @patch("frigate.util.services.time.monotonic")
    @patch("frigate.util.services._read_intel_drm_fdinfo")
-    def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep):
+    def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep, get_names):
        # 1 second of wall clock between snapshots
        monotonic.side_effect = [0.0, 1.0]
+        get_names.return_value = {"0000:00:02.0": "Intel Graphics"}

        # Two i915 clients on the same iGPU. Engine values are cumulative ns.
        # Deltas over the 1s window:
@ -79,11 +81,15 @@ class TestGpuStats(unittest.TestCase):

        sleep.assert_called_once()
        assert intel_stats == {
-            "gpu": "90.0%",
-            "mem": "-%",
-            "compute": "30.0%",
-            "dec": "60.0%",
-            "clients": {"100": "80.0%", "200": "10.0%"},
+            "0000:00:02.0": {
+                "name": "Intel Graphics",
+                "vendor": "intel",
+                "gpu": "90.0%",
+                "mem": "-%",
+                "compute": "30.0%",
+                "dec": "60.0%",
+                "clients": {"100": "80.0%", "200": "10.0%"},
+            },
        }

    @patch("frigate.util.services._read_intel_drm_fdinfo")
--- a/frigate/util/services.py
+++ b/frigate/util/services.py
@ -393,8 +393,10 @@ def _read_intel_drm_fdinfo(target_pdev: Optional[str]) -> dict:
    return snapshot


-def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, Any]]:
-    """Get stats by reading DRM fdinfo files.
+def get_intel_gpu_stats(
+    intel_gpu_device: Optional[str],
+) -> Optional[dict[str, dict[str, Any]]]:
+    """Get stats by reading DRM fdinfo files, bucketed per-pdev.

    Each DRM client FD exposes monotonic per-engine busy counters via
    /proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release).
@ -402,7 +404,14 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
    utilization. Render/3D and Compute are pooled into "compute"; Video and
    VideoEnhance into "dec". Overall "gpu" is the sum of those pools (clamped
    to 100%).
+
+    The return value is keyed by the GPU's drm-pdev string so multiple Intel
+    GPUs in the same system are reported separately. Each entry carries a
+    "name" populated from OpenVINO (falling back to the pdev) so callers can
+    surface a real device name in the UI.
    """
+    from frigate.stats.intel_gpu_info import intel_gpu_name_resolver
+
    target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device)

    snapshot_a = _read_intel_drm_fdinfo(target_pdev)
@ -417,19 +426,21 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
    if not snapshot_b or elapsed_ns <= 0:
        return None

-    engine_pct: dict[str, float] = {
-        "render": 0.0,
-        "video": 0.0,
-        "video-enhance": 0.0,
-        "compute": 0.0,
-    }
-    pid_pct: dict[str, float] = {}
+    def _new_engine_pct() -> dict[str, float]:
+        return {"render": 0.0, "video": 0.0, "video-enhance": 0.0, "compute": 0.0}
+
+    per_pdev_engine_pct: dict[str, dict[str, float]] = {}
+    per_pdev_pid_pct: dict[str, dict[str, float]] = {}

    for key, data_b in snapshot_b.items():
        data_a = snapshot_a.get(key)
        if not data_a or data_a["driver"] != data_b["driver"]:
            continue

+        pdev = key[0]
+        engine_pct = per_pdev_engine_pct.setdefault(pdev, _new_engine_pct())
+        pid_pct = per_pdev_pid_pct.setdefault(pdev, {})
+
        client_total = 0.0
        for engine, (busy_b, total_b) in data_b["engines"].items():
            if engine not in engine_pct:
@ -452,25 +463,37 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A

        pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total

-    for engine in engine_pct:
-        engine_pct[engine] = min(100.0, engine_pct[engine])
+    if not per_pdev_engine_pct:
+        return None

-    compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
-    dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
-    overall_pct = min(100.0, compute_pct + dec_pct)
+    names = intel_gpu_name_resolver.get_names()
+    results: dict[str, dict[str, Any]] = {}

-    results: dict[str, Any] = {
-        "gpu": f"{round(overall_pct, 2)}%",
-        "mem": "-%",
-        "compute": f"{round(compute_pct, 2)}%",
-        "dec": f"{round(dec_pct, 2)}%",
-    }
+    for pdev, engine_pct in per_pdev_engine_pct.items():
+        for engine in engine_pct:
+            engine_pct[engine] = min(100.0, engine_pct[engine])

-    if pid_pct:
-        results["clients"] = {
-            pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
+        compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
+        dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
+        overall_pct = min(100.0, compute_pct + dec_pct)
+
+        entry: dict[str, Any] = {
+            "name": names.get(pdev) or f"Intel GPU {pdev}",
+            "vendor": "intel",
+            "gpu": f"{round(overall_pct, 2)}%",
+            "mem": "-%",
+            "compute": f"{round(compute_pct, 2)}%",
+            "dec": f"{round(dec_pct, 2)}%",
        }

+        pid_pct = per_pdev_pid_pct.get(pdev)
+        if pid_pct:
+            entry["clients"] = {
+                pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
+            }
+
+        results[pdev] = entry
+
    return results


--- a/web/src/types/stats.ts
+++ b/web/src/types/stats.ts
@ -62,7 +62,10 @@ export type ExtraProcessStats = {
  mem?: string;
 };

+export type GpuVendor = "intel" | "amd" | "nvidia" | "rockchip" | "rpi";
+
 export type GpuStats = {
+  vendor?: GpuVendor;
  gpu: string;
  mem: string;
  enc?: string;
--- a/web/src/views/system/GeneralMetrics.tsx
+++ b/web/src/views/system/GeneralMetrics.tsx
@ -1,5 +1,5 @@
 import useSWR from "swr";
-import { FrigateStats, GpuInfo } from "@/types/stats";
+import { FrigateStats, GpuInfo, GpuStats } from "@/types/stats";
 import { startTransition, useEffect, useMemo, useState } from "react";
 import { useFrigateStats } from "@/api/ws";
 import {
@ -98,13 +98,11 @@ export default function GeneralMetrics({
    let nvCount = 0;

    statsHistory.length > 0 &&
-      Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => {
-        if (key == "amd-vaapi" || key == "intel-gpu") {
-          vaCount += 1;
-        }
-
-        if (key.includes("NVIDIA")) {
+      Object.values(statsHistory[0]?.gpu_usages ?? {}).forEach((stats) => {
+        if (stats.vendor === "nvidia") {
          nvCount += 1;
+        } else if (stats.vendor === "intel" || stats.vendor === "amd") {
+          vaCount += 1;
        }
      });

@ -288,11 +286,15 @@ export default function GeneralMetrics({
      return [];
    }

+    // Intel doesn't expose VRAM usage, so hide the memory section
+    // entirely when every reporting GPU is Intel.
+    const firstEntries: GpuStats[] = Object.values(
+      statsHistory[0]?.gpu_usages ?? {},
+    );
    if (
-      Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 &&
-      Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu"
+      firstEntries.length > 0 &&
+      firstEntries.every((s) => s.vendor === "intel")
    ) {
-      // intel gpu stats do not support memory
      return undefined;
    }

@ -307,6 +309,10 @@ export default function GeneralMetrics({
      }

      Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => {
+        if (stats.vendor === "intel") {
+          return;
+        }
+
        if (!(key in series)) {
          series[key] = { name: key, data: [] };
        }
@ -470,8 +476,9 @@ export default function GeneralMetrics({
      return false;
    }

-    const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {});
-    const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu");
+    const hasIntelGpu = Object.values(statsHistory[0]?.gpu_usages ?? {}).some(
+      (stats) => stats.vendor === "intel",
+    );

    if (!hasIntelGpu) {
      return false;
@ -486,14 +493,15 @@ export default function GeneralMetrics({
        continue;
      }

-      Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => {
-        if (key === "intel-gpu") {
-          if (gpuStats.gpu) {
-            hasDataPoints = true;
-            const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
-            if (!isNaN(gpuValue) && gpuValue > 0) {
-              allZero = false;
-            }
+      Object.values(stats.gpu_usages || {}).forEach((gpuStats) => {
+        if (gpuStats.vendor !== "intel") {
+          return;
+        }
+        if (gpuStats.gpu) {
+          hasDataPoints = true;
+          const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
+          if (!isNaN(gpuValue) && gpuValue > 0) {
+            allZero = false;
          }
        }
      });