From aaedd24f3717ee90ab5c935e0abcb112ad8cd027 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Mon, 28 Nov 2022 18:24:20 -0700 Subject: [PATCH] Add GPU stats to the /stats API and debug screen (#3931) * Add ffprobe endpoint * Get ffprobe for multiple inputs * Copy ffprobe in output * Fix bad if statement * Return full output of ffprobe process * Return full output of ffprobe process * Make ffprobe button show dialog with output and option to copy * Add driver names to consts * Add driver env var name * Setup general tracking for GPU stats * Catch RPi args as well * Add util to get radeontop results * Add real amd GPU stats * Fix missed arg * pass config * Use only the values * Fix vram * Add nvidia gpu stats * Use nvidia stats * Add chart for gpu stats * Format AMD with space between percent * Get correct nvidia % * Start to add support for intel GPU stats * Block out RPi as util is not currently available * Formatting * Fix mypy * Strip for float conversion * Strip for float conversion * Fix percent formatting * Remove name from gpu map * Add tests and fix AMD formatting * Add nvidia gpu stats test * Formatting * Add intel_gpu_top for testing * Formatting * Handle case where hwaccel is not setup * Formatting * Check to remove none * Don't use set * Cleanup and fix types * Handle case where args is list * Fix mypy * Cast to str * Fix type checking * Return none instead of empty * Fix organization * Make keys consistent * Make gpu match style * Get support for vainfo * Add vainfo endpoint * Set vainfo output in error correctly * Remove duplicate function * Fix errors * Do cpu & gpu work asynchonously * Fix async * Fix event loop * Fix crash * Fix naming * Send empty data for gpu if error occurs * Show error if gpu stats could not be retrieved * Fix mypy * Fix test * Don't use json for vainfo * Fix cross references * Strip unicode still * await vainfo response * Add gpu deps * Formatting * remove comments * Use empty string * Add vainfo back in --- docker/install_deps.sh | 4 +- frigate/const.py | 7 +++ frigate/http.py | 21 ++++++- frigate/stats.py | 99 +++++++++++++++++++++++++++++-- frigate/test/test_gpu_stats.py | 45 ++++++++++++++ frigate/util.py | 105 +++++++++++++++++++++++++++++++++ web/src/routes/System.jsx | 95 ++++++++++++++++++++++++++++- 7 files changed, 365 insertions(+), 11 deletions(-) create mode 100644 frigate/test/test_gpu_stats.py diff --git a/docker/install_deps.sh b/docker/install_deps.sh index 4929d2dd2..681a82f96 100755 --- a/docker/install_deps.sh +++ b/docker/install_deps.sh @@ -8,7 +8,7 @@ apt-get -qq install --no-install-recommends -y \ apt-transport-https \ gnupg \ wget \ - procps \ + procps vainfo \ unzip locales tzdata libxml2 xz-utils \ python3-pip @@ -53,7 +53,7 @@ if [[ "${TARGETARCH}" == "amd64" ]]; then echo 'deb http://deb.debian.org/debian testing main non-free' >/etc/apt/sources.list.d/debian-testing.list apt-get -qq update apt-get -qq install --no-install-recommends --no-install-suggests -y \ - mesa-va-drivers libva-drm2 intel-media-va-driver-non-free i965-va-driver libmfx1 + mesa-va-drivers libva-drm2 intel-media-va-driver-non-free i965-va-driver libmfx1 radeontop intel-gpu-tools rm -f /etc/apt/sources.list.d/debian-testing.list fi diff --git a/frigate/const.py b/frigate/const.py index d7dd8f197..b4d73f24b 100644 --- a/frigate/const.py +++ b/frigate/const.py @@ -11,3 +11,10 @@ PLUS_API_HOST = "https://api.frigate.video" REGEX_CAMERA_NAME = "^[a-zA-Z0-9_-]+$" REGEX_RTSP_CAMERA_USER_PASS = ":\/\/[a-zA-Z0-9_-]+:[\S]+@" REGEX_HTTP_CAMERA_USER_PASS = "user=[a-zA-Z0-9_-]+&password=[\S]+" + +# Known Driver Names + +DRIVER_ENV_VAR = "LIBVA_DRIVER_NAME" +DRIVER_AMD = "radeonsi" +DRIVER_INTEL_i965 = "i965" +DRIVER_INTEL_iHD = "iHD" diff --git a/frigate/http.py b/frigate/http.py index 22d96f2d6..f8d3e6325 100644 --- a/frigate/http.py +++ b/frigate/http.py @@ -26,11 +26,12 @@ from flask import ( from peewee import SqliteDatabase, operator, fn, DoesNotExist from playhouse.shortcuts import model_to_dict +from frigate.config import CameraConfig from frigate.const import CLIPS_DIR from frigate.models import Event, Recordings from frigate.object_processing import TrackedObject from frigate.stats import stats_snapshot -from frigate.util import clean_camera_user_pass, ffprobe_stream +from frigate.util import clean_camera_user_pass, ffprobe_stream, vainfo_hwaccel from frigate.version import VERSION logger = logging.getLogger(__name__) @@ -608,7 +609,7 @@ def version(): @bp.route("/stats") def stats(): - stats = stats_snapshot(current_app.stats_tracking) + stats = stats_snapshot(current_app.frigate_config, current_app.stats_tracking) return jsonify(stats) @@ -996,3 +997,19 @@ def ffprobe(): ) return jsonify(output) + + +@bp.route("/vainfo", methods=["GET"]) +def vainfo(): + vainfo = vainfo_hwaccel() + return jsonify( + { + "return_code": vainfo.returncode, + "stderr": vainfo.stderr.decode("unicode_escape").strip() + if vainfo.stderr.decode() + else "", + "stdout": vainfo.stdout.decode("unicode_escape").strip() + if vainfo.stdout.decode() + else "", + } + ) diff --git a/frigate/stats.py b/frigate/stats.py index 21f8dcf74..a9f1b8098 100644 --- a/frigate/stats.py +++ b/frigate/stats.py @@ -1,3 +1,4 @@ +import asyncio import json import logging import threading @@ -11,8 +12,9 @@ from multiprocessing.synchronize import Event as MpEvent from frigate.comms.dispatcher import Dispatcher from frigate.config import FrigateConfig -from frigate.const import RECORD_DIR, CLIPS_DIR, CACHE_DIR +from frigate.const import DRIVER_AMD, DRIVER_ENV_VAR, RECORD_DIR, CLIPS_DIR, CACHE_DIR from frigate.types import StatsTrackingTypes, CameraMetricsTypes +from frigate.util import get_amd_gpu_stats, get_intel_gpu_stats, get_nvidia_gpu_stats from frigate.version import VERSION from frigate.util import get_cpu_stats from frigate.object_detection import ObjectDetectProcess @@ -82,7 +84,96 @@ def get_temperatures() -> dict[str, float]: return temps -def stats_snapshot(stats_tracking: StatsTrackingTypes) -> dict[str, Any]: +def get_processing_stats(config: FrigateConfig, stats: dict[str, str]) -> None: + """Get stats for cpu / gpu.""" + + async def run_tasks() -> None: + await asyncio.wait( + [ + asyncio.create_task(set_gpu_stats(config, stats)), + asyncio.create_task(set_cpu_stats(stats)), + ] + ) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(run_tasks()) + loop.close() + + +async def set_cpu_stats(all_stats: dict[str, Any]) -> None: + """Set cpu usage from top.""" + cpu_stats = get_cpu_stats() + + if cpu_stats: + all_stats["cpu_usages"] = cpu_stats + + +async def set_gpu_stats(config: FrigateConfig, all_stats: dict[str, Any]) -> None: + """Parse GPUs from hwaccel args and use for stats.""" + hwaccel_args = [] + + for camera in config.cameras.values(): + args = camera.ffmpeg.hwaccel_args + + if isinstance(args, list): + args = " ".join(args) + + if args and args not in hwaccel_args: + hwaccel_args.append(args) + + stats: dict[str, dict] = {} + + for args in hwaccel_args: + if "cuvid" in args: + # nvidia GPU + nvidia_usage = get_nvidia_gpu_stats() + + if nvidia_usage: + name = nvidia_usage["name"] + del nvidia_usage["name"] + stats[name] = nvidia_usage + else: + stats["nvidia-gpu"] = {"gpu": -1, "mem": -1} + elif "qsv" in args: + # intel QSV GPU + intel_usage = get_intel_gpu_stats() + + if intel_usage: + stats["intel-qsv"] = intel_usage + else: + stats["intel-qsv"] = {"gpu": -1, "mem": -1} + elif "vaapi" in args: + driver = os.environ.get(DRIVER_ENV_VAR) + + if driver == DRIVER_AMD: + # AMD VAAPI GPU + amd_usage = get_amd_gpu_stats() + + if amd_usage: + stats["amd-vaapi"] = amd_usage + else: + stats["amd-vaapi"] = {"gpu": -1, "mem": -1} + else: + # intel VAAPI GPU + intel_usage = get_intel_gpu_stats() + + if intel_usage: + stats["intel-vaapi"] = intel_usage + else: + stats["intel-vaapi"] = {"gpu": -1, "mem": -1} + elif "v4l2m2m" in args: + # RPi v4l2m2m is currently not able to get usage stats + stats["rpi-v4l2m2m"] = {"gpu": -1, "mem": -1} + + if stats: + all_stats["gpu_usages"] = stats + + +def stats_snapshot( + config: FrigateConfig, stats_tracking: StatsTrackingTypes +) -> dict[str, Any]: + """Get a snapshot of the current stats that are being tracked.""" camera_metrics = stats_tracking["camera_metrics"] stats: dict[str, Any] = {} @@ -119,7 +210,7 @@ def stats_snapshot(stats_tracking: StatsTrackingTypes) -> dict[str, Any]: } stats["detection_fps"] = round(total_detection_fps, 2) - stats["cpu_usages"] = get_cpu_stats() + get_processing_stats(config, stats) stats["service"] = { "uptime": (int(time.time()) - stats_tracking["started"]), @@ -159,6 +250,6 @@ class StatsEmitter(threading.Thread): def run(self) -> None: time.sleep(10) while not self.stop_event.wait(self.config.mqtt.stats_interval): - stats = stats_snapshot(self.stats_tracking) + stats = stats_snapshot(self.config, self.stats_tracking) self.dispatcher.publish("stats", json.dumps(stats), retain=False) logger.info(f"Exiting watchdog...") diff --git a/frigate/test/test_gpu_stats.py b/frigate/test/test_gpu_stats.py new file mode 100644 index 000000000..8bff6b40a --- /dev/null +++ b/frigate/test/test_gpu_stats.py @@ -0,0 +1,45 @@ +import unittest +from unittest.mock import MagicMock, patch + +from frigate.util import get_amd_gpu_stats, get_intel_gpu_stats, get_nvidia_gpu_stats + + +class TestGpuStats(unittest.TestCase): + def setUp(self): + self.amd_results = "Unknown Radeon card. <= R500 won't work, new cards might.\nDumping to -, line limit 1.\n1664070990.607556: bus 10, gpu 4.17%, ee 0.00%, vgt 0.00%, ta 0.00%, tc 0.00%, sx 0.00%, sh 0.00%, spi 0.83%, smx 0.00%, cr 0.00%, sc 0.00%, pa 0.00%, db 0.00%, cb 0.00%, vram 60.37% 294.04mb, gtt 0.33% 52.21mb, mclk 100.00% 1.800ghz, sclk 26.65% 0.533ghz\n" + self.intel_results = """{"period":{"duration":1.194033,"unit":"ms"},"frequency":{"requested":0.000000,"actual":0.000000,"unit":"MHz"},"interrupts":{"count":3349.991164,"unit":"irq/s"},"rc6":{"value":47.844741,"unit":"%"},"engines":{"Render/3D/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Blitter/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/0":{"busy":4.533124,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/1":{"busy":6.194385,"sema":0.000000,"wait":0.000000,"unit":"%"},"VideoEnhance/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"}}},{"period":{"duration":1.189291,"unit":"ms"},"frequency":{"requested":0.000000,"actual":0.000000,"unit":"MHz"},"interrupts":{"count":0.000000,"unit":"irq/s"},"rc6":{"value":100.000000,"unit":"%"},"engines":{"Render/3D/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Blitter/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"Video/1":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"},"VideoEnhance/0":{"busy":0.000000,"sema":0.000000,"wait":0.000000,"unit":"%"}}}""" + self.nvidia_results = "name, utilization.gpu [%], memory.used [MiB], memory.total [MiB]\nNVIDIA GeForce RTX 3050, 42 %, 5036 MiB, 8192 MiB\n" + + @patch("subprocess.run") + def test_amd_gpu_stats(self, sp): + process = MagicMock() + process.returncode = 0 + process.stdout = self.amd_results + sp.return_value = process + amd_stats = get_amd_gpu_stats() + assert amd_stats == {"gpu": "4.17 %", "mem": "60.37 %"} + + @patch("subprocess.run") + def test_nvidia_gpu_stats(self, sp): + process = MagicMock() + process.returncode = 0 + process.stdout = self.nvidia_results + sp.return_value = process + nvidia_stats = get_nvidia_gpu_stats() + assert nvidia_stats == { + "name": "NVIDIA GeForce RTX 3050", + "gpu": "42 %", + "mem": "61.5 %", + } + + @patch("subprocess.run") + def test_intel_gpu_stats(self, sp): + process = MagicMock() + process.returncode = 0 + process.stdout = self.intel_results + sp.return_value = process + intel_stats = get_intel_gpu_stats() + assert intel_stats == { + "gpu": "10.73 %", + "mem": "- %", + } diff --git a/frigate/util.py b/frigate/util.py index 9f7f419bf..0a5ef6a39 100755 --- a/frigate/util.py +++ b/frigate/util.py @@ -766,6 +766,105 @@ def get_cpu_stats() -> dict[str, dict]: return usages +def get_amd_gpu_stats() -> dict[str, str]: + """Get stats using radeontop.""" + radeontop_command = ["radeontop", "-d", "-", "-l", "1"] + + p = sp.run( + radeontop_command, + encoding="ascii", + capture_output=True, + ) + + if p.returncode != 0: + logger.error(p.stderr) + return None + else: + usages = p.stdout.split(",") + results: dict[str, str] = {} + + for hw in usages: + if "gpu" in hw: + results["gpu"] = f"{hw.strip().split(' ')[1].replace('%', '')} %" + elif "vram" in hw: + results["mem"] = f"{hw.strip().split(' ')[1].replace('%', '')} %" + + return results + + +def get_intel_gpu_stats() -> dict[str, str]: + """Get stats using intel_gpu_top.""" + intel_gpu_top_command = [ + "timeout", + "0.1s", + "intel_gpu_top", + "-J", + "-o", + "-", + "-s", + "1", + ] + + p = sp.run( + intel_gpu_top_command, + encoding="ascii", + capture_output=True, + ) + + if p.returncode != 0: + logger.error(p.stderr) + return None + else: + readings = json.loads(f"[{p.stdout}]") + results: dict[str, str] = {} + + for reading in readings: + if reading.get("engines", {}).get("Video/0", {}).get( + "busy", 0 + ) or reading.get("engines", {}).get("Video/1", {}).get("busy", 0): + gpu_usage = round( + float(reading.get("engines", {}).get("Video/0", {}).get("busy", 0)) + + float( + reading.get("engines", {}).get("Video/1", {}).get("busy", 0) + ), + 2, + ) + results["gpu"] = f"{gpu_usage} %" + break + + results["mem"] = "- %" + return results + + +def get_nvidia_gpu_stats() -> dict[str, str]: + """Get stats using nvidia-smi.""" + nvidia_smi_command = [ + "nvidia-smi", + "--query-gpu=gpu_name,utilization.gpu,memory.used,memory.total", + "--format=csv", + ] + + p = sp.run( + nvidia_smi_command, + encoding="ascii", + capture_output=True, + ) + + if p.returncode != 0: + logger.error(p.stderr) + return None + else: + usages = p.stdout.split("\n")[1].strip().split(",") + memory_percent = f"{round(float(usages[2].replace(' MiB', '').strip()) / float(usages[3].replace(' MiB', '').strip()) * 100, 1)} %" + results: dict[str, str] = { + "name": usages[0], + "gpu": usages[1].strip(), + "mem": memory_percent, + } + + return results + + def ffprobe_stream(path: str) -> sp.CompletedProcess: """Run ffprobe on stream.""" ffprobe_cmd = [ @@ -781,6 +880,12 @@ def ffprobe_stream(path: str) -> sp.CompletedProcess: return sp.run(ffprobe_cmd, capture_output=True) +def vainfo_hwaccel() -> sp.CompletedProcess: + """Run vainfo.""" + ffprobe_cmd = ["vainfo"] + return sp.run(ffprobe_cmd, capture_output=True) + + class FrameManager(ABC): @abstractmethod def create(self, name, size) -> AnyStr: diff --git a/web/src/routes/System.jsx b/web/src/routes/System.jsx index 964e8f47a..8010b9139 100644 --- a/web/src/routes/System.jsx +++ b/web/src/routes/System.jsx @@ -21,9 +21,17 @@ export default function System() { } = useWs('stats'); const { data: initialStats } = useSWR('stats'); - const { cpu_usages, detectors, service = {}, detection_fps: _, ...cameras } = stats || initialStats || emptyObject; + const { + cpu_usages, + gpu_usages, + detectors, + service = {}, + detection_fps: _, + ...cameras + } = stats || initialStats || emptyObject; const detectorNames = Object.keys(detectors || emptyObject); + const gpuNames = Object.keys(gpu_usages || emptyObject); const cameraNames = Object.keys(cameras || emptyObject); const handleCopyConfig = useCallback(() => { @@ -55,9 +63,9 @@ export default function System() { }); if (response.status === 200) { - setState({ showFfprobe: true, ffprobe: JSON.stringify(response.data, null, 2) }); + setState({ ...state, showFfprobe: true, ffprobe: JSON.stringify(response.data, null, 2) }); } else { - setState({ ...state, ffprobe: 'There was an error getting the ffprobe output.' }); + setState({ ...state, showFfprobe: true, ffprobe: 'There was an error getting the ffprobe output.' }); } }; @@ -66,11 +74,31 @@ export default function System() { setState({ ...state, ffprobe: '', showFfprobe: false }); }; + const onHandleVainfo = async (e) => { + if (e) { + e.stopPropagation(); + } + + const response = await axios.get('vainfo'); + + if (response.status === 200) { + setState({ ...state, showVainfo: true, vainfo: JSON.stringify(response.data, null, 2) }); + } else { + setState({ ...state, showVainfo: true, vainfo: 'There was an error getting the vainfo output.' }); + } + }; + + const onCopyVainfo = async () => { + await window.navigator.clipboard.writeText(JSON.stringify(state.vaifp, null, 2)); + setState({ ...state, vainfo: '', showVainfo: false }); + }; + return (
System {service.version} + {state.showFfprobe && (
@@ -92,6 +120,23 @@ export default function System() {
)} + {state.showVainfo && ( + +
+ Vainfo Output + {state.vainfo != '' ?

{state.vainfo}

: } +
+
+ + +
+
+ )} + {!detectors ? (
@@ -125,6 +170,50 @@ export default function System() { ))}
+
+ GPUs + +
+ + {!gpu_usages ? ( +
+ + Hardware acceleration has not been setup, see the docs to setup hardware acceleration. + +
+ ) : ( +
+ {gpuNames.map((gpu) => ( +
+
{gpu}
+
+ {gpu_usages[gpu]['gpu'] == -1 ? ( +
+ There was an error getting usage stats. Either your GPU does not support this or frigate does + not have proper access. +
+ ) : ( + + + + + + + + + + + + + +
Gpu %Memory %
{gpu_usages[gpu]['gpu']}{gpu_usages[gpu]['mem']}
+ )} +
+
+ ))} +
+ )} + Cameras
{cameraNames.map((camera) => (