nvml

2026-02-05 02:35:22 +03:00 · 2023-05-03 09:22:54 +03:00 · 2023-05-03 09:22:54 +03:00 · a338f74ecd
commit a338f74ecd
parent b38c9e82e2
4 changed files with 34 additions and 39 deletions
--- a/frigate/stats.py
+++ b/frigate/stats.py
@ -151,9 +151,11 @@ async def set_gpu_stats(
            nvidia_usage = get_nvidia_gpu_stats()

            if nvidia_usage:
-                name = nvidia_usage["name"]
-                del nvidia_usage["name"]
-                stats[name] = nvidia_usage
+                for i in nvidia_usage:
+                    stats[nvidia_usage[i]["name"]] = {
+                        "gpu": round(nvidia_usage[i]["gpu"],2),
+                        "mem": round(nvidia_usage[i]["mem"],2),
+                    }
            else:
                stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
                hwaccel_errors.append(args)
--- a/frigate/util.py
+++ b/frigate/util.py
@ -16,6 +16,7 @@ from collections import Counter
 from collections.abc import Mapping
 from multiprocessing import shared_memory
 from typing import Any, AnyStr, Optional, Tuple
+import py3nvml.py3nvml as nvml

 import cv2
 import numpy as np
@ -915,46 +916,37 @@ def get_intel_gpu_stats() -> dict[str, str]:
        return results


+def try_get_info(f, h, default='N/A'):
+    try:
+        v = f(h)
+    except nvml.NVMLError_NotSupported:
+        v = default
+    return v
+
+
+
 def get_nvidia_gpu_stats() -> dict[str, str]:
-    """Get stats using nvidia-smi."""
-    nvidia_smi_command = [
-        "nvidia-smi",
-        "--query-gpu=gpu_name,utilization.gpu,memory.used,memory.total",
-        "--format=csv",
-    ]
-
-    if (
-        "CUDA_VISIBLE_DEVICES" in os.environ
-        and os.environ["CUDA_VISIBLE_DEVICES"].isdigit()
-    ):
-        nvidia_smi_command.extend(["--id", os.environ["CUDA_VISIBLE_DEVICES"]])
-    elif (
-        "NVIDIA_VISIBLE_DEVICES" in os.environ
-        and os.environ["NVIDIA_VISIBLE_DEVICES"].isdigit()
-    ):
-        nvidia_smi_command.extend(["--id", os.environ["NVIDIA_VISIBLE_DEVICES"]])
-
-    p = sp.run(
-        nvidia_smi_command,
-        encoding="ascii",
-        capture_output=True,
-    )
-
-    if p.returncode != 0:
-        logger.error(f"Unable to poll nvidia GPU stats: {p.stderr}")
-        return None
+    nvml.nvmlInit()
+    deviceCount = nvml.nvmlDeviceGetCount()
+    results = {}
+    for i in range(deviceCount):
+        handle = nvml.nvmlDeviceGetHandleByIndex(i)
+        meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
+        util = try_get_info(nvml.nvmlDeviceGetUtilizationRates, handle)
+        if util != 'N/A':
+            gpu_util = util.gpu
        else:
-        usages = p.stdout.split("\n")[1].strip().split(",")
-        memory_percent = f"{round(float(usages[2].replace(' MiB', '').strip()) / float(usages[3].replace(' MiB', '').strip()) * 100, 1)} %"
-        results: dict[str, str] = {
-            "name": usages[0],
-            "gpu": usages[1].strip(),
-            "mem": memory_percent,
+            gpu_util = 0
+        results[i] = {
+            "name": nvml.nvmlDeviceGetName(handle),
+            "gpu": gpu_util,
+            "mem": meminfo.used / meminfo.total * 100
        }

        return results


+
 def ffprobe_stream(path: str) -> sp.CompletedProcess:
    """Run ffprobe on stream."""
    clean_path = escape_special_characters(path)
--- a/requirements-wheels.txt
+++ b/requirements-wheels.txt
@ -11,6 +11,7 @@ peewee == 3.15.*
 peewee_migrate == 1.7.*
 psutil == 5.9.*
 pydantic == 1.10.*
+git+https://github.com/fbcotter/py3nvml#egg=py3nvml
 PyYAML == 6.0
 pytz == 2023.3
 tzlocal == 4.3
--- a/web/src/routes/System.jsx
+++ b/web/src/routes/System.jsx
@ -268,8 +268,8 @@ export default function System() {
                        </Thead>
                        <Tbody>
                          <Tr>
-                            <Td>{gpu_usages[gpu]['gpu']}</Td>
-                            <Td>{gpu_usages[gpu]['mem']}</Td>
+                            <Td>{gpu_usages[gpu]['gpu']}%</Td>
+                            <Td>{gpu_usages[gpu]['mem']}%</Td>
                          </Tr>
                        </Tbody>
                      </Table>