This commit is contained in:
Sergey Krashevich 2023-05-03 09:22:54 +03:00
parent b38c9e82e2
commit a338f74ecd
No known key found for this signature in database
GPG Key ID: 625171324E7D3856
4 changed files with 34 additions and 39 deletions

View File

@ -151,9 +151,11 @@ async def set_gpu_stats(
nvidia_usage = get_nvidia_gpu_stats() nvidia_usage = get_nvidia_gpu_stats()
if nvidia_usage: if nvidia_usage:
name = nvidia_usage["name"] for i in nvidia_usage:
del nvidia_usage["name"] stats[nvidia_usage[i]["name"]] = {
stats[name] = nvidia_usage "gpu": round(nvidia_usage[i]["gpu"],2),
"mem": round(nvidia_usage[i]["mem"],2),
}
else: else:
stats["nvidia-gpu"] = {"gpu": -1, "mem": -1} stats["nvidia-gpu"] = {"gpu": -1, "mem": -1}
hwaccel_errors.append(args) hwaccel_errors.append(args)

View File

@ -16,6 +16,7 @@ from collections import Counter
from collections.abc import Mapping from collections.abc import Mapping
from multiprocessing import shared_memory from multiprocessing import shared_memory
from typing import Any, AnyStr, Optional, Tuple from typing import Any, AnyStr, Optional, Tuple
import py3nvml.py3nvml as nvml
import cv2 import cv2
import numpy as np import numpy as np
@ -915,46 +916,37 @@ def get_intel_gpu_stats() -> dict[str, str]:
return results return results
def try_get_info(f, h, default='N/A'):
try:
v = f(h)
except nvml.NVMLError_NotSupported:
v = default
return v
def get_nvidia_gpu_stats() -> dict[str, str]: def get_nvidia_gpu_stats() -> dict[str, str]:
"""Get stats using nvidia-smi.""" nvml.nvmlInit()
nvidia_smi_command = [ deviceCount = nvml.nvmlDeviceGetCount()
"nvidia-smi", results = {}
"--query-gpu=gpu_name,utilization.gpu,memory.used,memory.total", for i in range(deviceCount):
"--format=csv", handle = nvml.nvmlDeviceGetHandleByIndex(i)
] meminfo = nvml.nvmlDeviceGetMemoryInfo(handle)
util = try_get_info(nvml.nvmlDeviceGetUtilizationRates, handle)
if ( if util != 'N/A':
"CUDA_VISIBLE_DEVICES" in os.environ gpu_util = util.gpu
and os.environ["CUDA_VISIBLE_DEVICES"].isdigit()
):
nvidia_smi_command.extend(["--id", os.environ["CUDA_VISIBLE_DEVICES"]])
elif (
"NVIDIA_VISIBLE_DEVICES" in os.environ
and os.environ["NVIDIA_VISIBLE_DEVICES"].isdigit()
):
nvidia_smi_command.extend(["--id", os.environ["NVIDIA_VISIBLE_DEVICES"]])
p = sp.run(
nvidia_smi_command,
encoding="ascii",
capture_output=True,
)
if p.returncode != 0:
logger.error(f"Unable to poll nvidia GPU stats: {p.stderr}")
return None
else: else:
usages = p.stdout.split("\n")[1].strip().split(",") gpu_util = 0
memory_percent = f"{round(float(usages[2].replace(' MiB', '').strip()) / float(usages[3].replace(' MiB', '').strip()) * 100, 1)} %" results[i] = {
results: dict[str, str] = { "name": nvml.nvmlDeviceGetName(handle),
"name": usages[0], "gpu": gpu_util,
"gpu": usages[1].strip(), "mem": meminfo.used / meminfo.total * 100
"mem": memory_percent,
} }
return results return results
def ffprobe_stream(path: str) -> sp.CompletedProcess: def ffprobe_stream(path: str) -> sp.CompletedProcess:
"""Run ffprobe on stream.""" """Run ffprobe on stream."""
clean_path = escape_special_characters(path) clean_path = escape_special_characters(path)

View File

@ -11,6 +11,7 @@ peewee == 3.15.*
peewee_migrate == 1.7.* peewee_migrate == 1.7.*
psutil == 5.9.* psutil == 5.9.*
pydantic == 1.10.* pydantic == 1.10.*
git+https://github.com/fbcotter/py3nvml#egg=py3nvml
PyYAML == 6.0 PyYAML == 6.0
pytz == 2023.3 pytz == 2023.3
tzlocal == 4.3 tzlocal == 4.3

View File

@ -268,8 +268,8 @@ export default function System() {
</Thead> </Thead>
<Tbody> <Tbody>
<Tr> <Tr>
<Td>{gpu_usages[gpu]['gpu']}</Td> <Td>{gpu_usages[gpu]['gpu']}%</Td>
<Td>{gpu_usages[gpu]['mem']}</Td> <Td>{gpu_usages[gpu]['mem']}%</Td>
</Tr> </Tr>
</Tbody> </Tbody>
</Table> </Table>