mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-07 22:15:28 +03:00
Rewrite intel GPU stats to use file descriptors instead of intel_gpu_top, leading to significantly better API for interaction and more accurate results
This commit is contained in:
parent
814c497bef
commit
286144deb4
@ -264,156 +264,210 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
|
||||
return results
|
||||
|
||||
|
||||
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
|
||||
"""Get stats using intel_gpu_top.
|
||||
_INTEL_FDINFO_SAMPLE_SECONDS = 1.0
|
||||
|
||||
Returns overall GPU usage derived from rc6 residency (idle time),
|
||||
plus individual engine breakdowns:
|
||||
- enc: Render/3D engine (compute/shader encoder, used by QSV)
|
||||
- dec: Video engines (fixed-function codec, used by VAAPI)
|
||||
# i915 fdinfo keys (cumulative ns) → logical engine name.
|
||||
_I915_ENGINE_KEYS = {
|
||||
"drm-engine-render": "render",
|
||||
"drm-engine-video": "video",
|
||||
"drm-engine-video-enhance": "video-enhance",
|
||||
"drm-engine-compute": "compute",
|
||||
"drm-engine-copy": "copy",
|
||||
}
|
||||
# Xe fdinfo suffixes (cumulative cycles, paired with drm-total-cycles-*).
|
||||
_XE_ENGINE_KEYS = {
|
||||
"rcs": "render",
|
||||
"vcs": "video",
|
||||
"vecs": "video-enhance",
|
||||
"ccs": "compute",
|
||||
"bcs": "copy",
|
||||
}
|
||||
|
||||
|
||||
def _resolve_intel_gpu_pdev(device: Optional[str]) -> Optional[str]:
|
||||
"""Map a configured GPU hint (/dev/dri/card1, renderD128, or a PCI bus
|
||||
address) to its drm-pdev string so we can filter fdinfo entries to that
|
||||
device. Returns None when no hint is supplied or it cannot be resolved."""
|
||||
if not device:
|
||||
return None
|
||||
|
||||
if re.match(r"^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$", device):
|
||||
return device
|
||||
|
||||
name = os.path.basename(device.rstrip("/"))
|
||||
try:
|
||||
return os.path.basename(os.path.realpath(f"/sys/class/drm/{name}/device"))
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def _read_intel_drm_fdinfo(target_pdev: Optional[str]) -> dict:
|
||||
"""Snapshot DRM fdinfo for every Intel client visible in /proc.
|
||||
|
||||
Returns a dict keyed by (pdev, drm-client-id, pid) so the same context
|
||||
seen via multiple file descriptors on a single process collapses to one
|
||||
entry.
|
||||
"""
|
||||
|
||||
def get_stats_manually(output: str) -> dict[str, str]:
|
||||
"""Find global stats via regex when json fails to parse."""
|
||||
reading = "".join(output)
|
||||
results: dict[str, str] = {}
|
||||
|
||||
# rc6 residency for overall GPU usage
|
||||
rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
|
||||
if rc6_match:
|
||||
rc6_value = float(rc6_match.group(1))
|
||||
results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
|
||||
else:
|
||||
results["gpu"] = "-%"
|
||||
|
||||
results["mem"] = "-%"
|
||||
|
||||
# Render/3D is the compute/encode engine
|
||||
render = []
|
||||
for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
|
||||
packet = json.loads(result[14:])
|
||||
single = packet.get("busy", 0.0)
|
||||
render.append(float(single))
|
||||
|
||||
if render:
|
||||
results["compute"] = f"{round(sum(render) / len(render), 2)}%"
|
||||
|
||||
# Video engines are the fixed-function decode engines
|
||||
video = []
|
||||
for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
|
||||
packet = json.loads(result[10:])
|
||||
single = packet.get("busy", 0.0)
|
||||
video.append(float(single))
|
||||
|
||||
if video:
|
||||
results["dec"] = f"{round(sum(video) / len(video), 2)}%"
|
||||
|
||||
return results
|
||||
|
||||
intel_gpu_top_command = [
|
||||
"timeout",
|
||||
"0.5s",
|
||||
"intel_gpu_top",
|
||||
"-J",
|
||||
"-o",
|
||||
"-",
|
||||
"-s",
|
||||
"1000", # Intel changed this from seconds to milliseconds in 2024+ versions
|
||||
]
|
||||
|
||||
if intel_gpu_device:
|
||||
intel_gpu_top_command += ["-d", intel_gpu_device]
|
||||
snapshot: dict = {}
|
||||
|
||||
try:
|
||||
p = sp.run(
|
||||
intel_gpu_top_command,
|
||||
encoding="ascii",
|
||||
capture_output=True,
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
proc_entries = os.listdir("/proc")
|
||||
except OSError:
|
||||
return snapshot
|
||||
|
||||
# timeout has a non-zero returncode when timeout is reached
|
||||
if p.returncode != 124:
|
||||
logger.error(f"Unable to poll intel GPU stats: {p.stderr}")
|
||||
return None
|
||||
else:
|
||||
output = "".join(p.stdout.split())
|
||||
for entry in proc_entries:
|
||||
if not entry.isdigit():
|
||||
continue
|
||||
|
||||
fdinfo_dir = f"/proc/{entry}/fdinfo"
|
||||
try:
|
||||
data = json.loads(f"[{output}]")
|
||||
except json.JSONDecodeError:
|
||||
return get_stats_manually(output)
|
||||
fds = os.listdir(fdinfo_dir)
|
||||
except (FileNotFoundError, PermissionError, NotADirectoryError, OSError):
|
||||
continue
|
||||
|
||||
results: dict[str, str] = {}
|
||||
rc6_values = []
|
||||
render_global = []
|
||||
video_global = []
|
||||
# per-client: {pid: [total_busy_per_sample, ...]}
|
||||
client_usages: dict[str, list[float]] = {}
|
||||
for fd in fds:
|
||||
try:
|
||||
with open(f"{fdinfo_dir}/{fd}") as f:
|
||||
content = f.read()
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
continue
|
||||
|
||||
for block in data:
|
||||
# rc6 residency: percentage of time GPU is idle
|
||||
rc6 = block.get("rc6", {}).get("value")
|
||||
if rc6 is not None:
|
||||
rc6_values.append(float(rc6))
|
||||
if "drm-driver" not in content:
|
||||
continue
|
||||
|
||||
global_engine = block.get("engines")
|
||||
fields: dict[str, str] = {}
|
||||
for line in content.splitlines():
|
||||
key, sep, value = line.partition(":")
|
||||
if sep:
|
||||
fields[key.strip()] = value.strip()
|
||||
|
||||
if global_engine:
|
||||
render_frame = global_engine.get("Render/3D/0", {}).get("busy")
|
||||
video_frame = global_engine.get("Video/0", {}).get("busy")
|
||||
driver = fields.get("drm-driver")
|
||||
if driver not in ("i915", "xe"):
|
||||
continue
|
||||
|
||||
if render_frame is not None:
|
||||
render_global.append(float(render_frame))
|
||||
pdev = fields.get("drm-pdev", "")
|
||||
if target_pdev and pdev != target_pdev:
|
||||
continue
|
||||
|
||||
if video_frame is not None:
|
||||
video_global.append(float(video_frame))
|
||||
client_id = fields.get("drm-client-id")
|
||||
if not client_id:
|
||||
continue
|
||||
|
||||
clients = block.get("clients", {})
|
||||
key = (pdev, client_id, entry)
|
||||
if key in snapshot:
|
||||
continue
|
||||
|
||||
if clients:
|
||||
for client_block in clients.values():
|
||||
pid = client_block["pid"]
|
||||
engines: dict[str, tuple[int, int]] = {}
|
||||
|
||||
if pid not in client_usages:
|
||||
client_usages[pid] = []
|
||||
if driver == "i915":
|
||||
for fkey, engine in _I915_ENGINE_KEYS.items():
|
||||
raw = fields.get(fkey)
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
engines[engine] = (int(raw.split()[0]), 0)
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
else:
|
||||
for suffix, engine in _XE_ENGINE_KEYS.items():
|
||||
busy_raw = fields.get(f"drm-cycles-{suffix}")
|
||||
total_raw = fields.get(f"drm-total-cycles-{suffix}")
|
||||
if not (busy_raw and total_raw):
|
||||
continue
|
||||
try:
|
||||
engines[engine] = (
|
||||
int(busy_raw.split()[0]),
|
||||
int(total_raw.split()[0]),
|
||||
)
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
# Sum all engine-class busy values for this client
|
||||
total_busy = 0.0
|
||||
for engine in client_block.get("engine-classes", {}).values():
|
||||
busy = engine.get("busy")
|
||||
if busy is not None:
|
||||
total_busy += float(busy)
|
||||
if not engines:
|
||||
continue
|
||||
|
||||
client_usages[pid].append(total_busy)
|
||||
snapshot[key] = {"driver": driver, "pid": entry, "engines": engines}
|
||||
|
||||
# Overall GPU usage from rc6 (idle) residency
|
||||
if rc6_values:
|
||||
rc6_avg = sum(rc6_values) / len(rc6_values)
|
||||
results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"
|
||||
return snapshot
|
||||
|
||||
results["mem"] = "-%"
|
||||
|
||||
# Compute: Render/3D engine (compute/shader workloads and QSV encode)
|
||||
if render_global:
|
||||
results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"
|
||||
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, Any]]:
|
||||
"""Get stats by reading DRM fdinfo files.
|
||||
|
||||
# Decoder: Video engine (fixed-function codec)
|
||||
if video_global:
|
||||
results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"
|
||||
Each DRM client FD exposes monotonic per-engine busy counters via
|
||||
/proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release).
|
||||
We sample twice and divide busy-time deltas by wall-clock to derive
|
||||
utilisation. Render/3D and the dedicated Compute engine are pooled into
|
||||
"compute"; Video into "dec".
|
||||
"""
|
||||
target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device)
|
||||
|
||||
# Per-client GPU usage (sum of all engines per process)
|
||||
if client_usages:
|
||||
results["clients"] = {}
|
||||
snapshot_a = _read_intel_drm_fdinfo(target_pdev)
|
||||
if not snapshot_a:
|
||||
return None
|
||||
|
||||
for pid, samples in client_usages.items():
|
||||
if samples:
|
||||
results["clients"][pid] = (
|
||||
f"{round(sum(samples) / len(samples), 2)}%"
|
||||
)
|
||||
start = time.monotonic()
|
||||
time.sleep(_INTEL_FDINFO_SAMPLE_SECONDS)
|
||||
elapsed_ns = (time.monotonic() - start) * 1e9
|
||||
|
||||
return results
|
||||
snapshot_b = _read_intel_drm_fdinfo(target_pdev)
|
||||
if not snapshot_b or elapsed_ns <= 0:
|
||||
return None
|
||||
|
||||
engine_pct: dict[str, float] = {
|
||||
"render": 0.0,
|
||||
"video": 0.0,
|
||||
"video-enhance": 0.0,
|
||||
"compute": 0.0,
|
||||
"copy": 0.0,
|
||||
}
|
||||
pid_pct: dict[str, float] = {}
|
||||
|
||||
for key, data_b in snapshot_b.items():
|
||||
data_a = snapshot_a.get(key)
|
||||
if not data_a or data_a["driver"] != data_b["driver"]:
|
||||
continue
|
||||
|
||||
client_total = 0.0
|
||||
for engine, (busy_b, total_b) in data_b["engines"].items():
|
||||
busy_a, total_a = data_a["engines"].get(engine, (busy_b, total_b))
|
||||
|
||||
if data_b["driver"] == "i915":
|
||||
delta = max(0, busy_b - busy_a)
|
||||
pct = min(100.0, delta / elapsed_ns * 100.0)
|
||||
else:
|
||||
delta_busy = max(0, busy_b - busy_a)
|
||||
delta_total = total_b - total_a
|
||||
if delta_total <= 0:
|
||||
continue
|
||||
pct = min(100.0, delta_busy / delta_total * 100.0)
|
||||
|
||||
engine_pct[engine] += pct
|
||||
client_total += pct
|
||||
|
||||
pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total
|
||||
|
||||
for engine in engine_pct:
|
||||
engine_pct[engine] = min(100.0, engine_pct[engine])
|
||||
|
||||
compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
|
||||
dec_pct = engine_pct["video"]
|
||||
overall_pct = max(
|
||||
compute_pct, dec_pct, engine_pct["video-enhance"], engine_pct["copy"]
|
||||
)
|
||||
|
||||
results: dict[str, Any] = {
|
||||
"gpu": f"{round(overall_pct, 2)}%",
|
||||
"mem": "-%",
|
||||
"compute": f"{round(compute_pct, 2)}%",
|
||||
"dec": f"{round(dec_pct, 2)}%",
|
||||
}
|
||||
|
||||
if pid_pct:
|
||||
results["clients"] = {
|
||||
pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def get_openvino_npu_stats() -> Optional[dict[str, str]]:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user