Adjust approach

This commit is contained in:
Nicolas Mowen 2026-05-04 09:07:19 -06:00
parent 40eac10b15
commit ba65485a53
2 changed files with 27 additions and 20 deletions

View File

@ -24,14 +24,15 @@ class TestGpuStats(unittest.TestCase):
# 1 second of wall clock between snapshots # 1 second of wall clock between snapshots
monotonic.side_effect = [0.0, 1.0] monotonic.side_effect = [0.0, 1.0]
# Two i915 clients on the same iGPU. Engine values are cumulative ns; # Two i915 clients on the same iGPU. Engine values are cumulative ns.
# we'll arrange deltas of: # Deltas over the 1s window:
# client A (pid 100): render +200_000_000 ns (20%), video +500_000_000 ns (50%) # client A (pid 100): render +200_000_000 (20%), video +500_000_000 (50%),
# client B (pid 200): compute +100_000_000 ns (10%) # video-enhance +100_000_000 (10%)
# Combined engine totals → render 20%, compute 10%, video 50% # client B (pid 200): compute +100_000_000 (10%)
# → "compute" = render + compute = 30% # Engine totals → render 20, video 50, video-enhance 10, compute 10
# → "dec" = video = 50% # → compute = render + compute = 30
# → "gpu" = max(30, 50, 0, 0) = 50% # → dec = video + video-enhance = 60
# → gpu = compute + dec = 90
snapshot_a = { snapshot_a = {
("0000:00:02.0", "1", "100"): { ("0000:00:02.0", "1", "100"): {
"driver": "i915", "driver": "i915",
@ -39,6 +40,7 @@ class TestGpuStats(unittest.TestCase):
"engines": { "engines": {
"render": (1_000_000_000, 0), "render": (1_000_000_000, 0),
"video": (5_000_000_000, 0), "video": (5_000_000_000, 0),
"video-enhance": (200_000_000, 0),
"compute": (0, 0), "compute": (0, 0),
}, },
}, },
@ -58,6 +60,7 @@ class TestGpuStats(unittest.TestCase):
"engines": { "engines": {
"render": (1_200_000_000, 0), "render": (1_200_000_000, 0),
"video": (5_500_000_000, 0), "video": (5_500_000_000, 0),
"video-enhance": (300_000_000, 0),
"compute": (0, 0), "compute": (0, 0),
}, },
}, },
@ -76,11 +79,11 @@ class TestGpuStats(unittest.TestCase):
sleep.assert_called_once() sleep.assert_called_once()
assert intel_stats == { assert intel_stats == {
"gpu": "50.0%", "gpu": "90.0%",
"mem": "-%", "mem": "-%",
"compute": "30.0%", "compute": "30.0%",
"dec": "50.0%", "dec": "60.0%",
"clients": {"100": "70.0%", "200": "10.0%"}, "clients": {"100": "80.0%", "200": "10.0%"},
} }
@patch("frigate.util.services._read_intel_drm_fdinfo") @patch("frigate.util.services._read_intel_drm_fdinfo")

View File

@ -266,13 +266,17 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
_INTEL_FDINFO_SAMPLE_SECONDS = 1.0 _INTEL_FDINFO_SAMPLE_SECONDS = 1.0
# Engines we track. Render/3D and Compute are pooled into "compute"; Video and
# VideoEnhance into "dec" (VideoEnhance is the post-process engine that handles
# VAAPI scaling/deinterlace/CSC, e.g. ffmpeg `-vf scale_vaapi=...`). The Copy
# (DMA blitter) engine is intentionally ignored — it represents transparent
# memory transfers, not user-visible GPU work.
# i915 fdinfo keys (cumulative ns) → logical engine name. # i915 fdinfo keys (cumulative ns) → logical engine name.
_I915_ENGINE_KEYS = { _I915_ENGINE_KEYS = {
"drm-engine-render": "render", "drm-engine-render": "render",
"drm-engine-video": "video", "drm-engine-video": "video",
"drm-engine-video-enhance": "video-enhance", "drm-engine-video-enhance": "video-enhance",
"drm-engine-compute": "compute", "drm-engine-compute": "compute",
"drm-engine-copy": "copy",
} }
# Xe fdinfo suffixes (cumulative cycles, paired with drm-total-cycles-*). # Xe fdinfo suffixes (cumulative cycles, paired with drm-total-cycles-*).
_XE_ENGINE_KEYS = { _XE_ENGINE_KEYS = {
@ -280,7 +284,6 @@ _XE_ENGINE_KEYS = {
"vcs": "video", "vcs": "video",
"vecs": "video-enhance", "vecs": "video-enhance",
"ccs": "compute", "ccs": "compute",
"bcs": "copy",
} }
@ -396,8 +399,9 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
Each DRM client FD exposes monotonic per-engine busy counters via Each DRM client FD exposes monotonic per-engine busy counters via
/proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release). /proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release).
We sample twice and divide busy-time deltas by wall-clock to derive We sample twice and divide busy-time deltas by wall-clock to derive
utilisation. Render/3D and the dedicated Compute engine are pooled into utilization. Render/3D and Compute are pooled into "compute"; Video and
"compute"; Video into "dec". VideoEnhance into "dec". Overall "gpu" is the sum of those pools (clamped
to 100%).
""" """
target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device) target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device)
@ -418,7 +422,6 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
"video": 0.0, "video": 0.0,
"video-enhance": 0.0, "video-enhance": 0.0,
"compute": 0.0, "compute": 0.0,
"copy": 0.0,
} }
pid_pct: dict[str, float] = {} pid_pct: dict[str, float] = {}
@ -429,6 +432,9 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
client_total = 0.0 client_total = 0.0
for engine, (busy_b, total_b) in data_b["engines"].items(): for engine, (busy_b, total_b) in data_b["engines"].items():
if engine not in engine_pct:
continue
busy_a, total_a = data_a["engines"].get(engine, (busy_b, total_b)) busy_a, total_a = data_a["engines"].get(engine, (busy_b, total_b))
if data_b["driver"] == "i915": if data_b["driver"] == "i915":
@ -450,10 +456,8 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, A
engine_pct[engine] = min(100.0, engine_pct[engine]) engine_pct[engine] = min(100.0, engine_pct[engine])
compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"]) compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
dec_pct = engine_pct["video"] dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
overall_pct = max( overall_pct = min(100.0, compute_pct + dec_pct)
compute_pct, dec_pct, engine_pct["video-enhance"], engine_pct["copy"]
)
results: dict[str, Any] = { results: dict[str, Any] = {
"gpu": f"{round(overall_pct, 2)}%", "gpu": f"{round(overall_pct, 2)}%",