mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-15 09:50:51 +03:00
Improve Intel Stats (#23190)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Implement per intel-gpu stats collection * Improve device naming * Improve GPU vendor handling * Cleanup
This commit is contained in:
parent
c8cfb9400a
commit
78fc472026
109
frigate/stats/intel_gpu_info.py
Normal file
109
frigate/stats/intel_gpu_info.py
Normal file
@ -0,0 +1,109 @@
|
||||
"""Resolve human-readable names for Intel GPUs via OpenVINO."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IntelGpuNameResolver:
|
||||
"""Build a pdev -> normalized device name map by enumerating OpenVINO GPUs.
|
||||
|
||||
The lookup is performed once on first access and cached for the process
|
||||
lifetime. OpenVINO exposes DEVICE_PCI_INFO (domain/bus/device/function) and
|
||||
FULL_DEVICE_NAME for each GPU it can see, which is enough to associate the
|
||||
name with the pdev string used by DRM fdinfo.
|
||||
"""
|
||||
|
||||
_names: Optional[dict[str, str]] = None
|
||||
|
||||
def get_names(self) -> dict[str, str]:
|
||||
if self._names is not None:
|
||||
return self._names
|
||||
|
||||
names: dict[str, str] = {}
|
||||
|
||||
try:
|
||||
from openvino import Core
|
||||
except ImportError:
|
||||
logger.debug("OpenVINO unavailable; cannot resolve Intel GPU names")
|
||||
self._names = names
|
||||
return names
|
||||
|
||||
try:
|
||||
core = Core()
|
||||
devices = core.available_devices
|
||||
except Exception as exc:
|
||||
logger.debug(f"OpenVINO Core initialization failed: {exc}")
|
||||
self._names = names
|
||||
return names
|
||||
|
||||
cpu_name: Optional[str] = None
|
||||
if "CPU" in devices:
|
||||
try:
|
||||
cpu_name = self._strip_trademarks(
|
||||
core.get_property("CPU", "FULL_DEVICE_NAME")
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(f"Failed to read CPU FULL_DEVICE_NAME: {exc}")
|
||||
|
||||
for device in devices:
|
||||
if not device.startswith("GPU"):
|
||||
continue
|
||||
|
||||
try:
|
||||
pci = core.get_property(device, "DEVICE_PCI_INFO")
|
||||
raw_name = core.get_property(device, "FULL_DEVICE_NAME")
|
||||
device_type = core.get_property(device, "DEVICE_TYPE")
|
||||
except Exception as exc:
|
||||
logger.debug(f"Failed to read properties for {device}: {exc}")
|
||||
continue
|
||||
|
||||
pdev = self._format_pdev(pci)
|
||||
if not pdev:
|
||||
continue
|
||||
|
||||
names[pdev] = self._resolve_name(raw_name, device_type, cpu_name)
|
||||
|
||||
self._names = names
|
||||
return names
|
||||
|
||||
@staticmethod
|
||||
def _format_pdev(pci) -> Optional[str]:
|
||||
try:
|
||||
return f"{pci.domain:04x}:{pci.bus:02x}:{pci.device:02x}.{pci.function:x}"
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _resolve_name(cls, raw_name: str, device_type, cpu_name: Optional[str]) -> str:
|
||||
"""Build a display name for a GPU.
|
||||
|
||||
Modern integrated Intel GPUs are reported by OpenVINO with a generic
|
||||
FULL_DEVICE_NAME like "Intel(R) Graphics (iGPU)" that gives no model
|
||||
information. Since the iGPU is part of the CPU on these platforms, fall
|
||||
back to the CPU name (which OpenVINO does report specifically) and
|
||||
suffix it with "iGPU" so it's clear what the entry is.
|
||||
"""
|
||||
is_integrated = "INTEGRATED" in str(device_type).upper()
|
||||
|
||||
if is_integrated and cpu_name:
|
||||
short_cpu = re.sub(r"^Intel\s+", "", cpu_name)
|
||||
return f"{short_cpu} iGPU"
|
||||
|
||||
return cls._normalize_name(raw_name)
|
||||
|
||||
@classmethod
|
||||
def _normalize_name(cls, name: str) -> str:
|
||||
cleaned = cls._strip_trademarks(name)
|
||||
cleaned = re.sub(r"\s*\((?:i|d)GPU\)\s*$", "", cleaned, flags=re.IGNORECASE)
|
||||
return " ".join(cleaned.split())
|
||||
|
||||
@staticmethod
|
||||
def _strip_trademarks(name: str) -> str:
|
||||
cleaned = re.sub(r"\(R\)|\(TM\)", "", name)
|
||||
return " ".join(cleaned.split())
|
||||
|
||||
|
||||
intel_gpu_name_resolver = IntelGpuNameResolver()
|
||||
@ -230,6 +230,7 @@ async def set_gpu_stats(
|
||||
hwaccel_args.append(args)
|
||||
|
||||
stats: dict[str, dict] = {}
|
||||
intel_gpu_collected = False
|
||||
|
||||
for args in hwaccel_args:
|
||||
if args in hwaccel_errors:
|
||||
@ -242,6 +243,7 @@ async def set_gpu_stats(
|
||||
if nvidia_usage:
|
||||
for i in range(len(nvidia_usage)):
|
||||
stats[nvidia_usage[i]["name"]] = {
|
||||
"vendor": "nvidia",
|
||||
"gpu": str(round(float(nvidia_usage[i]["gpu"]), 2)) + "%",
|
||||
"mem": str(round(float(nvidia_usage[i]["mem"]), 2)) + "%",
|
||||
"enc": str(round(float(nvidia_usage[i]["enc"]), 2)) + "%",
|
||||
@ -250,31 +252,34 @@ async def set_gpu_stats(
|
||||
}
|
||||
|
||||
else:
|
||||
stats["nvidia-gpu"] = {"gpu": "", "mem": ""}
|
||||
stats["nvidia-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "nvmpi" in args or "jetson" in args:
|
||||
# nvidia Jetson
|
||||
jetson_usage = get_jetson_stats()
|
||||
|
||||
if jetson_usage:
|
||||
stats["jetson-gpu"] = jetson_usage
|
||||
stats["jetson-gpu"] = {"vendor": "nvidia", **jetson_usage}
|
||||
else:
|
||||
stats["jetson-gpu"] = {"gpu": "", "mem": ""}
|
||||
stats["jetson-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
|
||||
if not config.telemetry.stats.intel_gpu_stats:
|
||||
continue
|
||||
|
||||
if "intel-gpu" not in stats:
|
||||
if not intel_gpu_collected:
|
||||
# intel GPU (QSV or VAAPI both use the same physical GPU)
|
||||
intel_gpu_collected = True
|
||||
intel_usage = get_intel_gpu_stats(
|
||||
config.telemetry.stats.intel_gpu_device
|
||||
)
|
||||
|
||||
if intel_usage is not None:
|
||||
stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
|
||||
if intel_usage:
|
||||
for entry in intel_usage.values():
|
||||
name = entry.pop("name")
|
||||
stats[name] = entry
|
||||
else:
|
||||
stats["intel-gpu"] = {"gpu": "", "mem": ""}
|
||||
stats["intel-gpu"] = {"vendor": "intel", "gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "vaapi" in args:
|
||||
if not config.telemetry.stats.amd_gpu_stats:
|
||||
@ -284,18 +289,18 @@ async def set_gpu_stats(
|
||||
amd_usage = get_amd_gpu_stats()
|
||||
|
||||
if amd_usage:
|
||||
stats["amd-vaapi"] = amd_usage
|
||||
stats["amd-vaapi"] = {"vendor": "amd", **amd_usage}
|
||||
else:
|
||||
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
|
||||
stats["amd-vaapi"] = {"vendor": "amd", "gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "preset-rk" in args:
|
||||
rga_usage = get_rockchip_gpu_stats()
|
||||
|
||||
if rga_usage:
|
||||
stats["rockchip"] = rga_usage
|
||||
stats["rockchip"] = {"vendor": "rockchip", **rga_usage}
|
||||
elif "v4l2m2m" in args or "rpi" in args:
|
||||
# RPi v4l2m2m is currently not able to get usage stats
|
||||
stats["rpi-v4l2m2m"] = {"gpu": "", "mem": ""}
|
||||
stats["rpi-v4l2m2m"] = {"vendor": "rpi", "gpu": "", "mem": ""}
|
||||
|
||||
if stats:
|
||||
all_stats["gpu_usages"] = stats
|
||||
|
||||
@ -17,12 +17,14 @@ class TestGpuStats(unittest.TestCase):
|
||||
amd_stats = get_amd_gpu_stats()
|
||||
assert amd_stats == {"gpu": "4.17%", "mem": "60.37%"}
|
||||
|
||||
@patch("frigate.stats.intel_gpu_info.intel_gpu_name_resolver.get_names")
|
||||
@patch("frigate.util.services.time.sleep")
|
||||
@patch("frigate.util.services.time.monotonic")
|
||||
@patch("frigate.util.services._read_intel_drm_fdinfo")
|
||||
def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep):
|
||||
def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep, get_names):
|
||||
# 1 second of wall clock between snapshots
|
||||
monotonic.side_effect = [0.0, 1.0]
|
||||
get_names.return_value = {"0000:00:02.0": "Intel Graphics"}
|
||||
|
||||
# Two i915 clients on the same iGPU. Engine values are cumulative ns.
|
||||
# Deltas over the 1s window:
|
||||
@ -79,11 +81,15 @@ class TestGpuStats(unittest.TestCase):
|
||||
|
||||
sleep.assert_called_once()
|
||||
assert intel_stats == {
|
||||
"gpu": "90.0%",
|
||||
"mem": "-%",
|
||||
"compute": "30.0%",
|
||||
"dec": "60.0%",
|
||||
"clients": {"100": "80.0%", "200": "10.0%"},
|
||||
"0000:00:02.0": {
|
||||
"name": "Intel Graphics",
|
||||
"vendor": "intel",
|
||||
"gpu": "90.0%",
|
||||
"mem": "-%",
|
||||
"compute": "30.0%",
|
||||
"dec": "60.0%",
|
||||
"clients": {"100": "80.0%", "200": "10.0%"},
|
||||
},
|
||||
}
|
||||
|
||||
@patch("frigate.util.services._read_intel_drm_fdinfo")
|
||||
|
||||
@ -393,8 +393,10 @@ def _read_intel_drm_fdinfo(target_pdev: Optional[str]) -> dict:
|
||||
return snapshot
|
||||
|
||||
|
||||
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, Any]]:
|
||||
"""Get stats by reading DRM fdinfo files.
|
||||
def get_intel_gpu_stats(
|
||||
intel_gpu_device: Optional[str],
|
||||
) -> Optional[dict[str, dict[str, Any]]]:
|
||||
"""Get stats by reading DRM fdinfo files, bucketed per-pdev.
|
||||
|
||||
Each DRM client FD exposes monotonic per-engine busy counters via
|
||||
/proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release).
|
||||
@ -402,7 +404,14 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
|
||||
utilization. Render/3D and Compute are pooled into "compute"; Video and
|
||||
VideoEnhance into "dec". Overall "gpu" is the sum of those pools (clamped
|
||||
to 100%).
|
||||
|
||||
The return value is keyed by the GPU's drm-pdev string so multiple Intel
|
||||
GPUs in the same system are reported separately. Each entry carries a
|
||||
"name" populated from OpenVINO (falling back to the pdev) so callers can
|
||||
surface a real device name in the UI.
|
||||
"""
|
||||
from frigate.stats.intel_gpu_info import intel_gpu_name_resolver
|
||||
|
||||
target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device)
|
||||
|
||||
snapshot_a = _read_intel_drm_fdinfo(target_pdev)
|
||||
@ -417,19 +426,21 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
|
||||
if not snapshot_b or elapsed_ns <= 0:
|
||||
return None
|
||||
|
||||
engine_pct: dict[str, float] = {
|
||||
"render": 0.0,
|
||||
"video": 0.0,
|
||||
"video-enhance": 0.0,
|
||||
"compute": 0.0,
|
||||
}
|
||||
pid_pct: dict[str, float] = {}
|
||||
def _new_engine_pct() -> dict[str, float]:
|
||||
return {"render": 0.0, "video": 0.0, "video-enhance": 0.0, "compute": 0.0}
|
||||
|
||||
per_pdev_engine_pct: dict[str, dict[str, float]] = {}
|
||||
per_pdev_pid_pct: dict[str, dict[str, float]] = {}
|
||||
|
||||
for key, data_b in snapshot_b.items():
|
||||
data_a = snapshot_a.get(key)
|
||||
if not data_a or data_a["driver"] != data_b["driver"]:
|
||||
continue
|
||||
|
||||
pdev = key[0]
|
||||
engine_pct = per_pdev_engine_pct.setdefault(pdev, _new_engine_pct())
|
||||
pid_pct = per_pdev_pid_pct.setdefault(pdev, {})
|
||||
|
||||
client_total = 0.0
|
||||
for engine, (busy_b, total_b) in data_b["engines"].items():
|
||||
if engine not in engine_pct:
|
||||
@ -452,25 +463,37 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
|
||||
|
||||
pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total
|
||||
|
||||
for engine in engine_pct:
|
||||
engine_pct[engine] = min(100.0, engine_pct[engine])
|
||||
if not per_pdev_engine_pct:
|
||||
return None
|
||||
|
||||
compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
|
||||
dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
|
||||
overall_pct = min(100.0, compute_pct + dec_pct)
|
||||
names = intel_gpu_name_resolver.get_names()
|
||||
results: dict[str, dict[str, Any]] = {}
|
||||
|
||||
results: dict[str, Any] = {
|
||||
"gpu": f"{round(overall_pct, 2)}%",
|
||||
"mem": "-%",
|
||||
"compute": f"{round(compute_pct, 2)}%",
|
||||
"dec": f"{round(dec_pct, 2)}%",
|
||||
}
|
||||
for pdev, engine_pct in per_pdev_engine_pct.items():
|
||||
for engine in engine_pct:
|
||||
engine_pct[engine] = min(100.0, engine_pct[engine])
|
||||
|
||||
if pid_pct:
|
||||
results["clients"] = {
|
||||
pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
|
||||
compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
|
||||
dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
|
||||
overall_pct = min(100.0, compute_pct + dec_pct)
|
||||
|
||||
entry: dict[str, Any] = {
|
||||
"name": names.get(pdev) or f"Intel GPU {pdev}",
|
||||
"vendor": "intel",
|
||||
"gpu": f"{round(overall_pct, 2)}%",
|
||||
"mem": "-%",
|
||||
"compute": f"{round(compute_pct, 2)}%",
|
||||
"dec": f"{round(dec_pct, 2)}%",
|
||||
}
|
||||
|
||||
pid_pct = per_pdev_pid_pct.get(pdev)
|
||||
if pid_pct:
|
||||
entry["clients"] = {
|
||||
pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
|
||||
}
|
||||
|
||||
results[pdev] = entry
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
@ -62,7 +62,10 @@ export type ExtraProcessStats = {
|
||||
mem?: string;
|
||||
};
|
||||
|
||||
export type GpuVendor = "intel" | "amd" | "nvidia" | "rockchip" | "rpi";
|
||||
|
||||
export type GpuStats = {
|
||||
vendor?: GpuVendor;
|
||||
gpu: string;
|
||||
mem: string;
|
||||
enc?: string;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import useSWR from "swr";
|
||||
import { FrigateStats, GpuInfo } from "@/types/stats";
|
||||
import { FrigateStats, GpuInfo, GpuStats } from "@/types/stats";
|
||||
import { startTransition, useEffect, useMemo, useState } from "react";
|
||||
import { useFrigateStats } from "@/api/ws";
|
||||
import {
|
||||
@ -98,13 +98,11 @@ export default function GeneralMetrics({
|
||||
let nvCount = 0;
|
||||
|
||||
statsHistory.length > 0 &&
|
||||
Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => {
|
||||
if (key == "amd-vaapi" || key == "intel-gpu") {
|
||||
vaCount += 1;
|
||||
}
|
||||
|
||||
if (key.includes("NVIDIA")) {
|
||||
Object.values(statsHistory[0]?.gpu_usages ?? {}).forEach((stats) => {
|
||||
if (stats.vendor === "nvidia") {
|
||||
nvCount += 1;
|
||||
} else if (stats.vendor === "intel" || stats.vendor === "amd") {
|
||||
vaCount += 1;
|
||||
}
|
||||
});
|
||||
|
||||
@ -288,11 +286,15 @@ export default function GeneralMetrics({
|
||||
return [];
|
||||
}
|
||||
|
||||
// Intel doesn't expose VRAM usage, so hide the memory section
|
||||
// entirely when every reporting GPU is Intel.
|
||||
const firstEntries: GpuStats[] = Object.values(
|
||||
statsHistory[0]?.gpu_usages ?? {},
|
||||
);
|
||||
if (
|
||||
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 &&
|
||||
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu"
|
||||
firstEntries.length > 0 &&
|
||||
firstEntries.every((s) => s.vendor === "intel")
|
||||
) {
|
||||
// intel gpu stats do not support memory
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@ -307,6 +309,10 @@ export default function GeneralMetrics({
|
||||
}
|
||||
|
||||
Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => {
|
||||
if (stats.vendor === "intel") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(key in series)) {
|
||||
series[key] = { name: key, data: [] };
|
||||
}
|
||||
@ -470,8 +476,9 @@ export default function GeneralMetrics({
|
||||
return false;
|
||||
}
|
||||
|
||||
const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {});
|
||||
const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu");
|
||||
const hasIntelGpu = Object.values(statsHistory[0]?.gpu_usages ?? {}).some(
|
||||
(stats) => stats.vendor === "intel",
|
||||
);
|
||||
|
||||
if (!hasIntelGpu) {
|
||||
return false;
|
||||
@ -486,14 +493,15 @@ export default function GeneralMetrics({
|
||||
continue;
|
||||
}
|
||||
|
||||
Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => {
|
||||
if (key === "intel-gpu") {
|
||||
if (gpuStats.gpu) {
|
||||
hasDataPoints = true;
|
||||
const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
|
||||
if (!isNaN(gpuValue) && gpuValue > 0) {
|
||||
allZero = false;
|
||||
}
|
||||
Object.values(stats.gpu_usages || {}).forEach((gpuStats) => {
|
||||
if (gpuStats.vendor !== "intel") {
|
||||
return;
|
||||
}
|
||||
if (gpuStats.gpu) {
|
||||
hasDataPoints = true;
|
||||
const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
|
||||
if (!isNaN(gpuValue) && gpuValue > 0) {
|
||||
allZero = false;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user