diff --git a/frigate/stats/intel_gpu_info.py b/frigate/stats/intel_gpu_info.py new file mode 100644 index 000000000..5ca3066fb --- /dev/null +++ b/frigate/stats/intel_gpu_info.py @@ -0,0 +1,109 @@ +"""Resolve human-readable names for Intel GPUs via OpenVINO.""" + +import logging +import re +from typing import Optional + +logger = logging.getLogger(__name__) + + +class IntelGpuNameResolver: + """Build a pdev -> normalized device name map by enumerating OpenVINO GPUs. + + The lookup is performed once on first access and cached for the process + lifetime. OpenVINO exposes DEVICE_PCI_INFO (domain/bus/device/function) and + FULL_DEVICE_NAME for each GPU it can see, which is enough to associate the + name with the pdev string used by DRM fdinfo. + """ + + _names: Optional[dict[str, str]] = None + + def get_names(self) -> dict[str, str]: + if self._names is not None: + return self._names + + names: dict[str, str] = {} + + try: + from openvino import Core + except ImportError: + logger.debug("OpenVINO unavailable; cannot resolve Intel GPU names") + self._names = names + return names + + try: + core = Core() + devices = core.available_devices + except Exception as exc: + logger.debug(f"OpenVINO Core initialization failed: {exc}") + self._names = names + return names + + cpu_name: Optional[str] = None + if "CPU" in devices: + try: + cpu_name = self._strip_trademarks( + core.get_property("CPU", "FULL_DEVICE_NAME") + ) + except Exception as exc: + logger.debug(f"Failed to read CPU FULL_DEVICE_NAME: {exc}") + + for device in devices: + if not device.startswith("GPU"): + continue + + try: + pci = core.get_property(device, "DEVICE_PCI_INFO") + raw_name = core.get_property(device, "FULL_DEVICE_NAME") + device_type = core.get_property(device, "DEVICE_TYPE") + except Exception as exc: + logger.debug(f"Failed to read properties for {device}: {exc}") + continue + + pdev = self._format_pdev(pci) + if not pdev: + continue + + names[pdev] = self._resolve_name(raw_name, device_type, cpu_name) + + self._names = names + return names + + @staticmethod + def _format_pdev(pci) -> Optional[str]: + try: + return f"{pci.domain:04x}:{pci.bus:02x}:{pci.device:02x}.{pci.function:x}" + except AttributeError: + return None + + @classmethod + def _resolve_name(cls, raw_name: str, device_type, cpu_name: Optional[str]) -> str: + """Build a display name for a GPU. + + Modern integrated Intel GPUs are reported by OpenVINO with a generic + FULL_DEVICE_NAME like "Intel(R) Graphics (iGPU)" that gives no model + information. Since the iGPU is part of the CPU on these platforms, fall + back to the CPU name (which OpenVINO does report specifically) and + suffix it with "iGPU" so it's clear what the entry is. + """ + is_integrated = "INTEGRATED" in str(device_type).upper() + + if is_integrated and cpu_name: + short_cpu = re.sub(r"^Intel\s+", "", cpu_name) + return f"{short_cpu} iGPU" + + return cls._normalize_name(raw_name) + + @classmethod + def _normalize_name(cls, name: str) -> str: + cleaned = cls._strip_trademarks(name) + cleaned = re.sub(r"\s*\((?:i|d)GPU\)\s*$", "", cleaned, flags=re.IGNORECASE) + return " ".join(cleaned.split()) + + @staticmethod + def _strip_trademarks(name: str) -> str: + cleaned = re.sub(r"\(R\)|\(TM\)", "", name) + return " ".join(cleaned.split()) + + +intel_gpu_name_resolver = IntelGpuNameResolver() diff --git a/frigate/stats/util.py b/frigate/stats/util.py index 07b410ad2..a0141d130 100644 --- a/frigate/stats/util.py +++ b/frigate/stats/util.py @@ -230,6 +230,7 @@ async def set_gpu_stats( hwaccel_args.append(args) stats: dict[str, dict] = {} + intel_gpu_collected = False for args in hwaccel_args: if args in hwaccel_errors: @@ -242,6 +243,7 @@ async def set_gpu_stats( if nvidia_usage: for i in range(len(nvidia_usage)): stats[nvidia_usage[i]["name"]] = { + "vendor": "nvidia", "gpu": str(round(float(nvidia_usage[i]["gpu"]), 2)) + "%", "mem": str(round(float(nvidia_usage[i]["mem"]), 2)) + "%", "enc": str(round(float(nvidia_usage[i]["enc"]), 2)) + "%", @@ -250,31 +252,34 @@ async def set_gpu_stats( } else: - stats["nvidia-gpu"] = {"gpu": "", "mem": ""} + stats["nvidia-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""} hwaccel_errors.append(args) elif "nvmpi" in args or "jetson" in args: # nvidia Jetson jetson_usage = get_jetson_stats() if jetson_usage: - stats["jetson-gpu"] = jetson_usage + stats["jetson-gpu"] = {"vendor": "nvidia", **jetson_usage} else: - stats["jetson-gpu"] = {"gpu": "", "mem": ""} + stats["jetson-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""} hwaccel_errors.append(args) elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()): if not config.telemetry.stats.intel_gpu_stats: continue - if "intel-gpu" not in stats: + if not intel_gpu_collected: # intel GPU (QSV or VAAPI both use the same physical GPU) + intel_gpu_collected = True intel_usage = get_intel_gpu_stats( config.telemetry.stats.intel_gpu_device ) - if intel_usage is not None: - stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""} + if intel_usage: + for entry in intel_usage.values(): + name = entry.pop("name") + stats[name] = entry else: - stats["intel-gpu"] = {"gpu": "", "mem": ""} + stats["intel-gpu"] = {"vendor": "intel", "gpu": "", "mem": ""} hwaccel_errors.append(args) elif "vaapi" in args: if not config.telemetry.stats.amd_gpu_stats: @@ -284,18 +289,18 @@ async def set_gpu_stats( amd_usage = get_amd_gpu_stats() if amd_usage: - stats["amd-vaapi"] = amd_usage + stats["amd-vaapi"] = {"vendor": "amd", **amd_usage} else: - stats["amd-vaapi"] = {"gpu": "", "mem": ""} + stats["amd-vaapi"] = {"vendor": "amd", "gpu": "", "mem": ""} hwaccel_errors.append(args) elif "preset-rk" in args: rga_usage = get_rockchip_gpu_stats() if rga_usage: - stats["rockchip"] = rga_usage + stats["rockchip"] = {"vendor": "rockchip", **rga_usage} elif "v4l2m2m" in args or "rpi" in args: # RPi v4l2m2m is currently not able to get usage stats - stats["rpi-v4l2m2m"] = {"gpu": "", "mem": ""} + stats["rpi-v4l2m2m"] = {"vendor": "rpi", "gpu": "", "mem": ""} if stats: all_stats["gpu_usages"] = stats diff --git a/frigate/test/test_gpu_stats.py b/frigate/test/test_gpu_stats.py index 85b12138d..f6986912f 100644 --- a/frigate/test/test_gpu_stats.py +++ b/frigate/test/test_gpu_stats.py @@ -17,12 +17,14 @@ class TestGpuStats(unittest.TestCase): amd_stats = get_amd_gpu_stats() assert amd_stats == {"gpu": "4.17%", "mem": "60.37%"} + @patch("frigate.stats.intel_gpu_info.intel_gpu_name_resolver.get_names") @patch("frigate.util.services.time.sleep") @patch("frigate.util.services.time.monotonic") @patch("frigate.util.services._read_intel_drm_fdinfo") - def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep): + def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep, get_names): # 1 second of wall clock between snapshots monotonic.side_effect = [0.0, 1.0] + get_names.return_value = {"0000:00:02.0": "Intel Graphics"} # Two i915 clients on the same iGPU. Engine values are cumulative ns. # Deltas over the 1s window: @@ -79,11 +81,15 @@ class TestGpuStats(unittest.TestCase): sleep.assert_called_once() assert intel_stats == { - "gpu": "90.0%", - "mem": "-%", - "compute": "30.0%", - "dec": "60.0%", - "clients": {"100": "80.0%", "200": "10.0%"}, + "0000:00:02.0": { + "name": "Intel Graphics", + "vendor": "intel", + "gpu": "90.0%", + "mem": "-%", + "compute": "30.0%", + "dec": "60.0%", + "clients": {"100": "80.0%", "200": "10.0%"}, + }, } @patch("frigate.util.services._read_intel_drm_fdinfo") diff --git a/frigate/util/services.py b/frigate/util/services.py index 657cf6d55..5ee15f8b4 100644 --- a/frigate/util/services.py +++ b/frigate/util/services.py @@ -393,8 +393,10 @@ def _read_intel_drm_fdinfo(target_pdev: Optional[str]) -> dict: return snapshot -def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, Any]]: - """Get stats by reading DRM fdinfo files. +def get_intel_gpu_stats( + intel_gpu_device: Optional[str], +) -> Optional[dict[str, dict[str, Any]]]: + """Get stats by reading DRM fdinfo files, bucketed per-pdev. Each DRM client FD exposes monotonic per-engine busy counters via /proc//fdinfo/ (i915 since kernel 5.19, Xe since first release). @@ -402,7 +404,14 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A utilization. Render/3D and Compute are pooled into "compute"; Video and VideoEnhance into "dec". Overall "gpu" is the sum of those pools (clamped to 100%). + + The return value is keyed by the GPU's drm-pdev string so multiple Intel + GPUs in the same system are reported separately. Each entry carries a + "name" populated from OpenVINO (falling back to the pdev) so callers can + surface a real device name in the UI. """ + from frigate.stats.intel_gpu_info import intel_gpu_name_resolver + target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device) snapshot_a = _read_intel_drm_fdinfo(target_pdev) @@ -417,19 +426,21 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A if not snapshot_b or elapsed_ns <= 0: return None - engine_pct: dict[str, float] = { - "render": 0.0, - "video": 0.0, - "video-enhance": 0.0, - "compute": 0.0, - } - pid_pct: dict[str, float] = {} + def _new_engine_pct() -> dict[str, float]: + return {"render": 0.0, "video": 0.0, "video-enhance": 0.0, "compute": 0.0} + + per_pdev_engine_pct: dict[str, dict[str, float]] = {} + per_pdev_pid_pct: dict[str, dict[str, float]] = {} for key, data_b in snapshot_b.items(): data_a = snapshot_a.get(key) if not data_a or data_a["driver"] != data_b["driver"]: continue + pdev = key[0] + engine_pct = per_pdev_engine_pct.setdefault(pdev, _new_engine_pct()) + pid_pct = per_pdev_pid_pct.setdefault(pdev, {}) + client_total = 0.0 for engine, (busy_b, total_b) in data_b["engines"].items(): if engine not in engine_pct: @@ -452,25 +463,37 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total - for engine in engine_pct: - engine_pct[engine] = min(100.0, engine_pct[engine]) + if not per_pdev_engine_pct: + return None - compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"]) - dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"]) - overall_pct = min(100.0, compute_pct + dec_pct) + names = intel_gpu_name_resolver.get_names() + results: dict[str, dict[str, Any]] = {} - results: dict[str, Any] = { - "gpu": f"{round(overall_pct, 2)}%", - "mem": "-%", - "compute": f"{round(compute_pct, 2)}%", - "dec": f"{round(dec_pct, 2)}%", - } + for pdev, engine_pct in per_pdev_engine_pct.items(): + for engine in engine_pct: + engine_pct[engine] = min(100.0, engine_pct[engine]) - if pid_pct: - results["clients"] = { - pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items() + compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"]) + dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"]) + overall_pct = min(100.0, compute_pct + dec_pct) + + entry: dict[str, Any] = { + "name": names.get(pdev) or f"Intel GPU {pdev}", + "vendor": "intel", + "gpu": f"{round(overall_pct, 2)}%", + "mem": "-%", + "compute": f"{round(compute_pct, 2)}%", + "dec": f"{round(dec_pct, 2)}%", } + pid_pct = per_pdev_pid_pct.get(pdev) + if pid_pct: + entry["clients"] = { + pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items() + } + + results[pdev] = entry + return results diff --git a/web/src/types/stats.ts b/web/src/types/stats.ts index 0bd4ebde3..0ebac9ebd 100644 --- a/web/src/types/stats.ts +++ b/web/src/types/stats.ts @@ -62,7 +62,10 @@ export type ExtraProcessStats = { mem?: string; }; +export type GpuVendor = "intel" | "amd" | "nvidia" | "rockchip" | "rpi"; + export type GpuStats = { + vendor?: GpuVendor; gpu: string; mem: string; enc?: string; diff --git a/web/src/views/system/GeneralMetrics.tsx b/web/src/views/system/GeneralMetrics.tsx index 15e0407b5..6d032de84 100644 --- a/web/src/views/system/GeneralMetrics.tsx +++ b/web/src/views/system/GeneralMetrics.tsx @@ -1,5 +1,5 @@ import useSWR from "swr"; -import { FrigateStats, GpuInfo } from "@/types/stats"; +import { FrigateStats, GpuInfo, GpuStats } from "@/types/stats"; import { startTransition, useEffect, useMemo, useState } from "react"; import { useFrigateStats } from "@/api/ws"; import { @@ -98,13 +98,11 @@ export default function GeneralMetrics({ let nvCount = 0; statsHistory.length > 0 && - Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => { - if (key == "amd-vaapi" || key == "intel-gpu") { - vaCount += 1; - } - - if (key.includes("NVIDIA")) { + Object.values(statsHistory[0]?.gpu_usages ?? {}).forEach((stats) => { + if (stats.vendor === "nvidia") { nvCount += 1; + } else if (stats.vendor === "intel" || stats.vendor === "amd") { + vaCount += 1; } }); @@ -288,11 +286,15 @@ export default function GeneralMetrics({ return []; } + // Intel doesn't expose VRAM usage, so hide the memory section + // entirely when every reporting GPU is Intel. + const firstEntries: GpuStats[] = Object.values( + statsHistory[0]?.gpu_usages ?? {}, + ); if ( - Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 && - Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu" + firstEntries.length > 0 && + firstEntries.every((s) => s.vendor === "intel") ) { - // intel gpu stats do not support memory return undefined; } @@ -307,6 +309,10 @@ export default function GeneralMetrics({ } Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => { + if (stats.vendor === "intel") { + return; + } + if (!(key in series)) { series[key] = { name: key, data: [] }; } @@ -470,8 +476,9 @@ export default function GeneralMetrics({ return false; } - const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {}); - const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu"); + const hasIntelGpu = Object.values(statsHistory[0]?.gpu_usages ?? {}).some( + (stats) => stats.vendor === "intel", + ); if (!hasIntelGpu) { return false; @@ -486,14 +493,15 @@ export default function GeneralMetrics({ continue; } - Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => { - if (key === "intel-gpu") { - if (gpuStats.gpu) { - hasDataPoints = true; - const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1)); - if (!isNaN(gpuValue) && gpuValue > 0) { - allZero = false; - } + Object.values(stats.gpu_usages || {}).forEach((gpuStats) => { + if (gpuStats.vendor !== "intel") { + return; + } + if (gpuStats.gpu) { + hasDataPoints = true; + const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1)); + if (!isNaN(gpuValue) && gpuValue > 0) { + allZero = false; } } });