mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-05 04:57:42 +03:00
Rewrite intel stats (#23108)
* Rewrite intel GPU stats to use file descriptors instead of intel_gpu_top, leading to significantly better API for interaction and more accurate results * Update tests * Update docs * Adjust approach * Update strings
This commit is contained in:
parent
814c497bef
commit
ef9d7e07b7
@ -136,90 +136,32 @@ ffmpeg:
|
|||||||
</TabItem>
|
</TabItem>
|
||||||
</ConfigTabs>
|
</ConfigTabs>
|
||||||
|
|
||||||
### Configuring Intel GPU Stats in Docker
|
### Configuring Intel GPU Stats
|
||||||
|
|
||||||
Additional configuration is needed for the Docker container to be able to access the `intel_gpu_top` command for GPU stats. There are two options:
|
Frigate reads Intel GPU utilization directly from the kernel's per-client DRM usage counters exposed at `/proc/<pid>/fdinfo/<fd>`. This requires:
|
||||||
|
|
||||||
1. Run the container as privileged.
|
- Linux kernel **5.19 or newer** for the `i915` driver, or any release of the `xe` driver.
|
||||||
2. Add the `CAP_PERFMON` capability (note: you might need to set the `perf_event_paranoid` low enough to allow access to the performance event system.)
|
- Frigate running with permission to read other processes' fdinfo. Running as root inside the container (the default) satisfies this; non-root setups may need `CAP_SYS_PTRACE`.
|
||||||
|
|
||||||
#### Run as privileged
|
No `intel_gpu_top` binary, `CAP_PERFMON`, privileged mode, or `perf_event_paranoid` tuning is required.
|
||||||
|
|
||||||
This method works, but it gives more permissions to the container than are actually needed.
|
#### Stats for SR-IOV or specific devices
|
||||||
|
|
||||||
##### Docker Compose - Privileged
|
If the host has more than one Intel GPU (e.g. an iGPU plus a discrete GPU, or SR-IOV virtual functions), pin stats collection to a specific device by setting `intel_gpu_device` to either its PCI bus address or a DRM card/render-node path:
|
||||||
|
|
||||||
```yaml
|
|
||||||
services:
|
|
||||||
frigate:
|
|
||||||
...
|
|
||||||
image: ghcr.io/blakeblackshear/frigate:stable
|
|
||||||
# highlight-next-line
|
|
||||||
privileged: true
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Docker Run CLI - Privileged
|
|
||||||
|
|
||||||
```bash {4}
|
|
||||||
docker run -d \
|
|
||||||
--name frigate \
|
|
||||||
...
|
|
||||||
--privileged \
|
|
||||||
ghcr.io/blakeblackshear/frigate:stable
|
|
||||||
```
|
|
||||||
|
|
||||||
#### CAP_PERFMON
|
|
||||||
|
|
||||||
Only recent versions of Docker support the `CAP_PERFMON` capability. You can test to see if yours supports it by running: `docker run --cap-add=CAP_PERFMON hello-world`
|
|
||||||
|
|
||||||
##### Docker Compose - CAP_PERFMON
|
|
||||||
|
|
||||||
```yaml {5,6}
|
|
||||||
services:
|
|
||||||
frigate:
|
|
||||||
...
|
|
||||||
image: ghcr.io/blakeblackshear/frigate:stable
|
|
||||||
cap_add:
|
|
||||||
- CAP_PERFMON
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Docker Run CLI - CAP_PERFMON
|
|
||||||
|
|
||||||
```bash {4}
|
|
||||||
docker run -d \
|
|
||||||
--name frigate \
|
|
||||||
...
|
|
||||||
--cap-add=CAP_PERFMON \
|
|
||||||
ghcr.io/blakeblackshear/frigate:stable
|
|
||||||
```
|
|
||||||
|
|
||||||
#### perf_event_paranoid
|
|
||||||
|
|
||||||
_Note: This setting must be changed for the entire system._
|
|
||||||
|
|
||||||
For more information on the various values across different distributions, see https://askubuntu.com/questions/1400874/what-does-perf-paranoia-level-four-do.
|
|
||||||
|
|
||||||
Depending on your OS and kernel configuration, you may need to change the `/proc/sys/kernel/perf_event_paranoid` kernel tunable. You can test the change by running `sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'` which will persist until a reboot. Make it permanent by running `sudo sh -c 'echo kernel.perf_event_paranoid=2 >> /etc/sysctl.d/local.conf'`
|
|
||||||
|
|
||||||
#### Stats for SR-IOV or other devices
|
|
||||||
|
|
||||||
When using virtualized GPUs via SR-IOV, you need to specify the device path to use to gather stats from `intel_gpu_top`. This example may work for some systems using SR-IOV:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
telemetry:
|
telemetry:
|
||||||
stats:
|
stats:
|
||||||
intel_gpu_device: "sriov"
|
intel_gpu_device: "0000:00:02.0"
|
||||||
```
|
```
|
||||||
|
|
||||||
For other virtualized GPUs, try specifying the direct path to the device instead:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
telemetry:
|
telemetry:
|
||||||
stats:
|
stats:
|
||||||
intel_gpu_device: "drm:/dev/dri/card0"
|
intel_gpu_device: "/dev/dri/card1"
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are passing in a device path, make sure you've passed the device through to the container.
|
When passing a device path, make sure the device is also passed through to the container.
|
||||||
|
|
||||||
## AMD-based CPUs
|
## AMD-based CPUs
|
||||||
|
|
||||||
|
|||||||
@ -25,8 +25,8 @@ class StatsConfig(FrigateBaseModel):
|
|||||||
)
|
)
|
||||||
intel_gpu_device: Optional[str] = Field(
|
intel_gpu_device: Optional[str] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
title="SR-IOV device",
|
title="Intel GPU device",
|
||||||
description="Device identifier used when treating Intel GPUs as SR-IOV to fix GPU stats.",
|
description="PCI bus address or DRM device path (e.g. /dev/dri/card1) used to pin Intel GPU stats to a specific device when multiple are present.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,8 +7,6 @@ from frigate.util.services import get_amd_gpu_stats, get_intel_gpu_stats
|
|||||||
class TestGpuStats(unittest.TestCase):
|
class TestGpuStats(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.amd_results = "Unknown Radeon card. <= R500 won't work, new cards might.\nDumping to -, line limit 1.\n1664070990.607556: bus 10, gpu 4.17%, ee 0.00%, vgt 0.00%, ta 0.00%, tc 0.00%, sx 0.00%, sh 0.00%, spi 0.83%, smx 0.00%, cr 0.00%, sc 0.00%, pa 0.00%, db 0.00%, cb 0.00%, vram 60.37% 294.04mb, gtt 0.33% 52.21mb, mclk 100.00% 1.800ghz, sclk 26.65% 0.533ghz\n"
|
self.amd_results = "Unknown Radeon card. <= R500 won't work, new cards might.\nDumping to -, line limit 1.\n1664070990.607556: bus 10, gpu 4.17%, ee 0.00%, vgt 0.00%, ta 0.00%, tc 0.00%, sx 0.00%, sh 0.00%, spi 0.83%, smx 0.00%, cr 0.00%, sc 0.00%, pa 0.00%, db 0.00%, cb 0.00%, vram 60.37% 294.04mb, gtt 0.33% 52.21mb, mclk 100.00% 1.800ghz, sclk 26.65% 0.533ghz\n"
|
||||||
self.intel_results = """{"period":{"duration":1.194033,"unit":"ms"},"frequency":{"requested":0.000000,"actual":0.000000,"unit":"MHz"},"interrupts":{"count":3349.991164,"unit":"irq/s"},"rc6":{"value":47.844741,"unit":"%"},"engines":{"Render/3D/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Blitter/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/0":{"busy":4.533124,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/1":{"busy":6.194385,"sema":0.000000,"wait":0.000000,"unit":"%"},"VideoEnhance/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"}}},{"period":{"duration":1.189291,"unit":"ms"},"frequency":{"requested":0.000000,"actual":0.000000,"unit":"MHz"},"interrupts":{"count":0.000000,"unit":"irq/s"},"rc6":{"value":100.000000,"unit":"%"},"engines":{"Render/3D/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Blitter/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/1":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"VideoEnhance/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"}}}"""
|
|
||||||
self.nvidia_results = "name, utilization.gpu [%], memory.used [MiB], memory.total [MiB]\nNVIDIA GeForce RTX 3050, 42 %, 5036 MiB, 8192 MiB\n"
|
|
||||||
|
|
||||||
@patch("subprocess.run")
|
@patch("subprocess.run")
|
||||||
def test_amd_gpu_stats(self, sp):
|
def test_amd_gpu_stats(self, sp):
|
||||||
@ -19,32 +17,76 @@ class TestGpuStats(unittest.TestCase):
|
|||||||
amd_stats = get_amd_gpu_stats()
|
amd_stats = get_amd_gpu_stats()
|
||||||
assert amd_stats == {"gpu": "4.17%", "mem": "60.37%"}
|
assert amd_stats == {"gpu": "4.17%", "mem": "60.37%"}
|
||||||
|
|
||||||
# @patch("subprocess.run")
|
@patch("frigate.util.services.time.sleep")
|
||||||
# def test_nvidia_gpu_stats(self, sp):
|
@patch("frigate.util.services.time.monotonic")
|
||||||
# process = MagicMock()
|
@patch("frigate.util.services._read_intel_drm_fdinfo")
|
||||||
# process.returncode = 0
|
def test_intel_gpu_stats_fdinfo(self, read_fdinfo, monotonic, sleep):
|
||||||
# process.stdout = self.nvidia_results
|
# 1 second of wall clock between snapshots
|
||||||
# sp.return_value = process
|
monotonic.side_effect = [0.0, 1.0]
|
||||||
# nvidia_stats = get_nvidia_gpu_stats()
|
|
||||||
# assert nvidia_stats == {
|
|
||||||
# "name": "NVIDIA GeForce RTX 3050",
|
|
||||||
# "gpu": "42 %",
|
|
||||||
# "mem": "61.5 %",
|
|
||||||
# }
|
|
||||||
|
|
||||||
@patch("subprocess.run")
|
# Two i915 clients on the same iGPU. Engine values are cumulative ns.
|
||||||
def test_intel_gpu_stats(self, sp):
|
# Deltas over the 1s window:
|
||||||
process = MagicMock()
|
# client A (pid 100): render +200_000_000 (20%), video +500_000_000 (50%),
|
||||||
process.returncode = 124
|
# video-enhance +100_000_000 (10%)
|
||||||
process.stdout = self.intel_results
|
# client B (pid 200): compute +100_000_000 (10%)
|
||||||
sp.return_value = process
|
# Engine totals → render 20, video 50, video-enhance 10, compute 10
|
||||||
intel_stats = get_intel_gpu_stats(False)
|
# → compute = render + compute = 30
|
||||||
# rc6 values: 47.844741 and 100.0 → avg 73.92 → gpu = 100 - 73.92 = 26.08%
|
# → dec = video + video-enhance = 60
|
||||||
# Render/3D/0: 0.0 and 0.0 → enc = 0.0%
|
# → gpu = compute + dec = 90
|
||||||
# Video/0: 4.533124 and 0.0 → dec = 2.27%
|
snapshot_a = {
|
||||||
assert intel_stats == {
|
("0000:00:02.0", "1", "100"): {
|
||||||
"gpu": "26.08%",
|
"driver": "i915",
|
||||||
"mem": "-%",
|
"pid": "100",
|
||||||
"compute": "0.0%",
|
"engines": {
|
||||||
"dec": "2.27%",
|
"render": (1_000_000_000, 0),
|
||||||
|
"video": (5_000_000_000, 0),
|
||||||
|
"video-enhance": (200_000_000, 0),
|
||||||
|
"compute": (0, 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
("0000:00:02.0", "2", "200"): {
|
||||||
|
"driver": "i915",
|
||||||
|
"pid": "200",
|
||||||
|
"engines": {
|
||||||
|
"render": (0, 0),
|
||||||
|
"compute": (2_000_000_000, 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
snapshot_b = {
|
||||||
|
("0000:00:02.0", "1", "100"): {
|
||||||
|
"driver": "i915",
|
||||||
|
"pid": "100",
|
||||||
|
"engines": {
|
||||||
|
"render": (1_200_000_000, 0),
|
||||||
|
"video": (5_500_000_000, 0),
|
||||||
|
"video-enhance": (300_000_000, 0),
|
||||||
|
"compute": (0, 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
("0000:00:02.0", "2", "200"): {
|
||||||
|
"driver": "i915",
|
||||||
|
"pid": "200",
|
||||||
|
"engines": {
|
||||||
|
"render": (0, 0),
|
||||||
|
"compute": (2_100_000_000, 0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
read_fdinfo.side_effect = [snapshot_a, snapshot_b]
|
||||||
|
|
||||||
|
intel_stats = get_intel_gpu_stats(None)
|
||||||
|
|
||||||
|
sleep.assert_called_once()
|
||||||
|
assert intel_stats == {
|
||||||
|
"gpu": "90.0%",
|
||||||
|
"mem": "-%",
|
||||||
|
"compute": "30.0%",
|
||||||
|
"dec": "60.0%",
|
||||||
|
"clients": {"100": "80.0%", "200": "10.0%"},
|
||||||
|
}
|
||||||
|
|
||||||
|
@patch("frigate.util.services._read_intel_drm_fdinfo")
|
||||||
|
def test_intel_gpu_stats_no_clients(self, read_fdinfo):
|
||||||
|
read_fdinfo.return_value = {}
|
||||||
|
assert get_intel_gpu_stats(None) is None
|
||||||
|
|||||||
@ -264,156 +264,214 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
|
_INTEL_FDINFO_SAMPLE_SECONDS = 1.0
|
||||||
"""Get stats using intel_gpu_top.
|
|
||||||
|
|
||||||
Returns overall GPU usage derived from rc6 residency (idle time),
|
# Engines we track. Render/3D and Compute are pooled into "compute"; Video and
|
||||||
plus individual engine breakdowns:
|
# VideoEnhance into "dec" (VideoEnhance is the post-process engine that handles
|
||||||
- enc: Render/3D engine (compute/shader encoder, used by QSV)
|
# VAAPI scaling/deinterlace/CSC, e.g. ffmpeg `-vf scale_vaapi=...`). The Copy
|
||||||
- dec: Video engines (fixed-function codec, used by VAAPI)
|
# (DMA blitter) engine is intentionally ignored — it represents transparent
|
||||||
|
# memory transfers, not user-visible GPU work.
|
||||||
|
# i915 fdinfo keys (cumulative ns) → logical engine name.
|
||||||
|
_I915_ENGINE_KEYS = {
|
||||||
|
"drm-engine-render": "render",
|
||||||
|
"drm-engine-video": "video",
|
||||||
|
"drm-engine-video-enhance": "video-enhance",
|
||||||
|
"drm-engine-compute": "compute",
|
||||||
|
}
|
||||||
|
# Xe fdinfo suffixes (cumulative cycles, paired with drm-total-cycles-*).
|
||||||
|
_XE_ENGINE_KEYS = {
|
||||||
|
"rcs": "render",
|
||||||
|
"vcs": "video",
|
||||||
|
"vecs": "video-enhance",
|
||||||
|
"ccs": "compute",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_intel_gpu_pdev(device: Optional[str]) -> Optional[str]:
|
||||||
|
"""Map a configured GPU hint (/dev/dri/card1, renderD128, or a PCI bus
|
||||||
|
address) to its drm-pdev string so we can filter fdinfo entries to that
|
||||||
|
device. Returns None when no hint is supplied or it cannot be resolved."""
|
||||||
|
if not device:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if re.match(r"^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$", device):
|
||||||
|
return device
|
||||||
|
|
||||||
|
name = os.path.basename(device.rstrip("/"))
|
||||||
|
try:
|
||||||
|
return os.path.basename(os.path.realpath(f"/sys/class/drm/{name}/device"))
|
||||||
|
except OSError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_intel_drm_fdinfo(target_pdev: Optional[str]) -> dict:
|
||||||
|
"""Snapshot DRM fdinfo for every Intel client visible in /proc.
|
||||||
|
|
||||||
|
Returns a dict keyed by (pdev, drm-client-id, pid) so the same context
|
||||||
|
seen via multiple file descriptors on a single process collapses to one
|
||||||
|
entry.
|
||||||
"""
|
"""
|
||||||
|
snapshot: dict = {}
|
||||||
def get_stats_manually(output: str) -> dict[str, str]:
|
|
||||||
"""Find global stats via regex when json fails to parse."""
|
|
||||||
reading = "".join(output)
|
|
||||||
results: dict[str, str] = {}
|
|
||||||
|
|
||||||
# rc6 residency for overall GPU usage
|
|
||||||
rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
|
|
||||||
if rc6_match:
|
|
||||||
rc6_value = float(rc6_match.group(1))
|
|
||||||
results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
|
|
||||||
else:
|
|
||||||
results["gpu"] = "-%"
|
|
||||||
|
|
||||||
results["mem"] = "-%"
|
|
||||||
|
|
||||||
# Render/3D is the compute/encode engine
|
|
||||||
render = []
|
|
||||||
for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
|
|
||||||
packet = json.loads(result[14:])
|
|
||||||
single = packet.get("busy", 0.0)
|
|
||||||
render.append(float(single))
|
|
||||||
|
|
||||||
if render:
|
|
||||||
results["compute"] = f"{round(sum(render) / len(render), 2)}%"
|
|
||||||
|
|
||||||
# Video engines are the fixed-function decode engines
|
|
||||||
video = []
|
|
||||||
for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
|
|
||||||
packet = json.loads(result[10:])
|
|
||||||
single = packet.get("busy", 0.0)
|
|
||||||
video.append(float(single))
|
|
||||||
|
|
||||||
if video:
|
|
||||||
results["dec"] = f"{round(sum(video) / len(video), 2)}%"
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
intel_gpu_top_command = [
|
|
||||||
"timeout",
|
|
||||||
"0.5s",
|
|
||||||
"intel_gpu_top",
|
|
||||||
"-J",
|
|
||||||
"-o",
|
|
||||||
"-",
|
|
||||||
"-s",
|
|
||||||
"1000", # Intel changed this from seconds to milliseconds in 2024+ versions
|
|
||||||
]
|
|
||||||
|
|
||||||
if intel_gpu_device:
|
|
||||||
intel_gpu_top_command += ["-d", intel_gpu_device]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
p = sp.run(
|
proc_entries = os.listdir("/proc")
|
||||||
intel_gpu_top_command,
|
except OSError:
|
||||||
encoding="ascii",
|
return snapshot
|
||||||
capture_output=True,
|
|
||||||
)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# timeout has a non-zero returncode when timeout is reached
|
for entry in proc_entries:
|
||||||
if p.returncode != 124:
|
if not entry.isdigit():
|
||||||
logger.error(f"Unable to poll intel GPU stats: {p.stderr}")
|
continue
|
||||||
return None
|
|
||||||
else:
|
|
||||||
output = "".join(p.stdout.split())
|
|
||||||
|
|
||||||
|
fdinfo_dir = f"/proc/{entry}/fdinfo"
|
||||||
try:
|
try:
|
||||||
data = json.loads(f"[{output}]")
|
fds = os.listdir(fdinfo_dir)
|
||||||
except json.JSONDecodeError:
|
except (FileNotFoundError, PermissionError, NotADirectoryError, OSError):
|
||||||
return get_stats_manually(output)
|
continue
|
||||||
|
|
||||||
results: dict[str, str] = {}
|
for fd in fds:
|
||||||
rc6_values = []
|
try:
|
||||||
render_global = []
|
with open(f"{fdinfo_dir}/{fd}") as f:
|
||||||
video_global = []
|
content = f.read()
|
||||||
# per-client: {pid: [total_busy_per_sample, ...]}
|
except (FileNotFoundError, PermissionError, OSError):
|
||||||
client_usages: dict[str, list[float]] = {}
|
continue
|
||||||
|
|
||||||
for block in data:
|
if "drm-driver" not in content:
|
||||||
# rc6 residency: percentage of time GPU is idle
|
continue
|
||||||
rc6 = block.get("rc6", {}).get("value")
|
|
||||||
if rc6 is not None:
|
|
||||||
rc6_values.append(float(rc6))
|
|
||||||
|
|
||||||
global_engine = block.get("engines")
|
fields: dict[str, str] = {}
|
||||||
|
for line in content.splitlines():
|
||||||
|
key, sep, value = line.partition(":")
|
||||||
|
if sep:
|
||||||
|
fields[key.strip()] = value.strip()
|
||||||
|
|
||||||
if global_engine:
|
driver = fields.get("drm-driver")
|
||||||
render_frame = global_engine.get("Render/3D/0", {}).get("busy")
|
if driver not in ("i915", "xe"):
|
||||||
video_frame = global_engine.get("Video/0", {}).get("busy")
|
continue
|
||||||
|
|
||||||
if render_frame is not None:
|
pdev = fields.get("drm-pdev", "")
|
||||||
render_global.append(float(render_frame))
|
if target_pdev and pdev != target_pdev:
|
||||||
|
continue
|
||||||
|
|
||||||
if video_frame is not None:
|
client_id = fields.get("drm-client-id")
|
||||||
video_global.append(float(video_frame))
|
if not client_id:
|
||||||
|
continue
|
||||||
|
|
||||||
clients = block.get("clients", {})
|
key = (pdev, client_id, entry)
|
||||||
|
if key in snapshot:
|
||||||
|
continue
|
||||||
|
|
||||||
if clients:
|
engines: dict[str, tuple[int, int]] = {}
|
||||||
for client_block in clients.values():
|
|
||||||
pid = client_block["pid"]
|
|
||||||
|
|
||||||
if pid not in client_usages:
|
if driver == "i915":
|
||||||
client_usages[pid] = []
|
for fkey, engine in _I915_ENGINE_KEYS.items():
|
||||||
|
raw = fields.get(fkey)
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
engines[engine] = (int(raw.split()[0]), 0)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
for suffix, engine in _XE_ENGINE_KEYS.items():
|
||||||
|
busy_raw = fields.get(f"drm-cycles-{suffix}")
|
||||||
|
total_raw = fields.get(f"drm-total-cycles-{suffix}")
|
||||||
|
if not (busy_raw and total_raw):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
engines[engine] = (
|
||||||
|
int(busy_raw.split()[0]),
|
||||||
|
int(total_raw.split()[0]),
|
||||||
|
)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
continue
|
||||||
|
|
||||||
# Sum all engine-class busy values for this client
|
if not engines:
|
||||||
total_busy = 0.0
|
continue
|
||||||
for engine in client_block.get("engine-classes", {}).values():
|
|
||||||
busy = engine.get("busy")
|
|
||||||
if busy is not None:
|
|
||||||
total_busy += float(busy)
|
|
||||||
|
|
||||||
client_usages[pid].append(total_busy)
|
snapshot[key] = {"driver": driver, "pid": entry, "engines": engines}
|
||||||
|
|
||||||
# Overall GPU usage from rc6 (idle) residency
|
return snapshot
|
||||||
if rc6_values:
|
|
||||||
rc6_avg = sum(rc6_values) / len(rc6_values)
|
|
||||||
results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"
|
|
||||||
|
|
||||||
results["mem"] = "-%"
|
|
||||||
|
|
||||||
# Compute: Render/3D engine (compute/shader workloads and QSV encode)
|
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, Any]]:
|
||||||
if render_global:
|
"""Get stats by reading DRM fdinfo files.
|
||||||
results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"
|
|
||||||
|
|
||||||
# Decoder: Video engine (fixed-function codec)
|
Each DRM client FD exposes monotonic per-engine busy counters via
|
||||||
if video_global:
|
/proc/<pid>/fdinfo/<fd> (i915 since kernel 5.19, Xe since first release).
|
||||||
results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"
|
We sample twice and divide busy-time deltas by wall-clock to derive
|
||||||
|
utilization. Render/3D and Compute are pooled into "compute"; Video and
|
||||||
|
VideoEnhance into "dec". Overall "gpu" is the sum of those pools (clamped
|
||||||
|
to 100%).
|
||||||
|
"""
|
||||||
|
target_pdev = _resolve_intel_gpu_pdev(intel_gpu_device)
|
||||||
|
|
||||||
# Per-client GPU usage (sum of all engines per process)
|
snapshot_a = _read_intel_drm_fdinfo(target_pdev)
|
||||||
if client_usages:
|
if not snapshot_a:
|
||||||
results["clients"] = {}
|
return None
|
||||||
|
|
||||||
for pid, samples in client_usages.items():
|
start = time.monotonic()
|
||||||
if samples:
|
time.sleep(_INTEL_FDINFO_SAMPLE_SECONDS)
|
||||||
results["clients"][pid] = (
|
elapsed_ns = (time.monotonic() - start) * 1e9
|
||||||
f"{round(sum(samples) / len(samples), 2)}%"
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
snapshot_b = _read_intel_drm_fdinfo(target_pdev)
|
||||||
|
if not snapshot_b or elapsed_ns <= 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
engine_pct: dict[str, float] = {
|
||||||
|
"render": 0.0,
|
||||||
|
"video": 0.0,
|
||||||
|
"video-enhance": 0.0,
|
||||||
|
"compute": 0.0,
|
||||||
|
}
|
||||||
|
pid_pct: dict[str, float] = {}
|
||||||
|
|
||||||
|
for key, data_b in snapshot_b.items():
|
||||||
|
data_a = snapshot_a.get(key)
|
||||||
|
if not data_a or data_a["driver"] != data_b["driver"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
client_total = 0.0
|
||||||
|
for engine, (busy_b, total_b) in data_b["engines"].items():
|
||||||
|
if engine not in engine_pct:
|
||||||
|
continue
|
||||||
|
|
||||||
|
busy_a, total_a = data_a["engines"].get(engine, (busy_b, total_b))
|
||||||
|
|
||||||
|
if data_b["driver"] == "i915":
|
||||||
|
delta = max(0, busy_b - busy_a)
|
||||||
|
pct = min(100.0, delta / elapsed_ns * 100.0)
|
||||||
|
else:
|
||||||
|
delta_busy = max(0, busy_b - busy_a)
|
||||||
|
delta_total = total_b - total_a
|
||||||
|
if delta_total <= 0:
|
||||||
|
continue
|
||||||
|
pct = min(100.0, delta_busy / delta_total * 100.0)
|
||||||
|
|
||||||
|
engine_pct[engine] += pct
|
||||||
|
client_total += pct
|
||||||
|
|
||||||
|
pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total
|
||||||
|
|
||||||
|
for engine in engine_pct:
|
||||||
|
engine_pct[engine] = min(100.0, engine_pct[engine])
|
||||||
|
|
||||||
|
compute_pct = min(100.0, engine_pct["render"] + engine_pct["compute"])
|
||||||
|
dec_pct = min(100.0, engine_pct["video"] + engine_pct["video-enhance"])
|
||||||
|
overall_pct = min(100.0, compute_pct + dec_pct)
|
||||||
|
|
||||||
|
results: dict[str, Any] = {
|
||||||
|
"gpu": f"{round(overall_pct, 2)}%",
|
||||||
|
"mem": "-%",
|
||||||
|
"compute": f"{round(compute_pct, 2)}%",
|
||||||
|
"dec": f"{round(dec_pct, 2)}%",
|
||||||
|
}
|
||||||
|
|
||||||
|
if pid_pct:
|
||||||
|
results["clients"] = {
|
||||||
|
pid: f"{round(min(100.0, pct), 2)}%" for pid, pct in pid_pct.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_openvino_npu_stats() -> Optional[dict[str, str]]:
|
def get_openvino_npu_stats() -> Optional[dict[str, str]]:
|
||||||
|
|||||||
@ -485,6 +485,10 @@
|
|||||||
"hwaccel_args": {
|
"hwaccel_args": {
|
||||||
"label": "Export hwaccel args",
|
"label": "Export hwaccel args",
|
||||||
"description": "Hardware acceleration args to use for export/transcode operations."
|
"description": "Hardware acceleration args to use for export/transcode operations."
|
||||||
|
},
|
||||||
|
"max_concurrent": {
|
||||||
|
"label": "Maximum concurrent exports",
|
||||||
|
"description": "Maximum number of export jobs to process at the same time."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"preview": {
|
"preview": {
|
||||||
|
|||||||
@ -242,8 +242,8 @@
|
|||||||
"description": "Enable per-process network bandwidth monitoring for camera ffmpeg processes and detectors (requires capabilities)."
|
"description": "Enable per-process network bandwidth monitoring for camera ffmpeg processes and detectors (requires capabilities)."
|
||||||
},
|
},
|
||||||
"intel_gpu_device": {
|
"intel_gpu_device": {
|
||||||
"label": "SR-IOV device",
|
"label": "Intel GPU device",
|
||||||
"description": "Device identifier used when treating Intel GPUs as SR-IOV to fix GPU stats."
|
"description": "PCI bus address or DRM device path (e.g. /dev/dri/card1) used to pin Intel GPU stats to a specific device when multiple are present."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version_check": {
|
"version_check": {
|
||||||
@ -1000,6 +1000,10 @@
|
|||||||
"hwaccel_args": {
|
"hwaccel_args": {
|
||||||
"label": "Export hwaccel args",
|
"label": "Export hwaccel args",
|
||||||
"description": "Hardware acceleration args to use for export/transcode operations."
|
"description": "Hardware acceleration args to use for export/transcode operations."
|
||||||
|
},
|
||||||
|
"max_concurrent": {
|
||||||
|
"label": "Maximum concurrent exports",
|
||||||
|
"description": "Maximum number of export jobs to process at the same time."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"preview": {
|
"preview": {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user