Refactor Intel Stats (#22674)

* Improve Intel stats collection * Update handling of stats to be simpler * Simplify handling * More accurately label Intel stats * Cleanup * Remove
2026-05-15 09:50:51 +03:00 · 2026-03-29 11:09:02 -06:00 · 2026-03-29 11:09:02 -06:00 · 831cfc2444
commit 831cfc2444
parent 29ca18c24c
8 changed files with 180 additions and 81 deletions
--- a/frigate/stats/prometheus.py
+++ b/frigate/stats/prometheus.py
@ -355,16 +355,37 @@ class CustomCollector(object):
        gpu_mem_usages = GaugeMetricFamily(
            "frigate_gpu_mem_usage_percent", "GPU memory usage %", labels=["gpu_name"]
        )
+        gpu_enc_usages = GaugeMetricFamily(
+            "frigate_gpu_encoder_usage_percent",
+            "GPU encoder utilisation %",
+            labels=["gpu_name"],
+        )
+        gpu_compute_usages = GaugeMetricFamily(
+            "frigate_gpu_compute_usage_percent",
+            "GPU compute / encode utilisation %",
+            labels=["gpu_name"],
+        )
+        gpu_dec_usages = GaugeMetricFamily(
+            "frigate_gpu_decoder_usage_percent",
+            "GPU decoder utilisation %",
+            labels=["gpu_name"],
+        )

        try:
            for gpu_name, gpu_stats in stats["gpu_usages"].items():
                self.add_metric(gpu_usages, [gpu_name], gpu_stats, "gpu")
                self.add_metric(gpu_mem_usages, [gpu_name], gpu_stats, "mem")
+                self.add_metric(gpu_enc_usages, [gpu_name], gpu_stats, "enc")
+                self.add_metric(gpu_compute_usages, [gpu_name], gpu_stats, "compute")
+                self.add_metric(gpu_dec_usages, [gpu_name], gpu_stats, "dec")
        except KeyError:
            pass

        yield gpu_usages
        yield gpu_mem_usages
+        yield gpu_enc_usages
+        yield gpu_compute_usages
+        yield gpu_dec_usages

        # service stats
        uptime_seconds = GaugeMetricFamily(
--- a/frigate/stats/util.py
+++ b/frigate/stats/util.py
@ -261,45 +261,33 @@ async def set_gpu_stats(
            else:
                stats["jetson-gpu"] = {"gpu": "", "mem": ""}
                hwaccel_errors.append(args)
-        elif "qsv" in args:
+        elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
            if not config.telemetry.stats.intel_gpu_stats:
                continue

-            # intel QSV GPU
-            intel_usage = get_intel_gpu_stats(config.telemetry.stats.intel_gpu_device)
-
-            if intel_usage is not None:
-                stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
-            else:
-                stats["intel-qsv"] = {"gpu": "", "mem": ""}
-                hwaccel_errors.append(args)
-        elif "vaapi" in args:
-            if is_vaapi_amd_driver():
-                if not config.telemetry.stats.amd_gpu_stats:
-                    continue
-
-                # AMD VAAPI GPU
-                amd_usage = get_amd_gpu_stats()
-
-                if amd_usage:
-                    stats["amd-vaapi"] = amd_usage
-                else:
-                    stats["amd-vaapi"] = {"gpu": "", "mem": ""}
-                    hwaccel_errors.append(args)
-            else:
-                if not config.telemetry.stats.intel_gpu_stats:
-                    continue
-
-                # intel VAAPI GPU
+            if "intel-gpu" not in stats:
+                # intel GPU (QSV or VAAPI both use the same physical GPU)
                intel_usage = get_intel_gpu_stats(
                    config.telemetry.stats.intel_gpu_device
                )

                if intel_usage is not None:
-                    stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}
+                    stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
                else:
-                    stats["intel-vaapi"] = {"gpu": "", "mem": ""}
+                    stats["intel-gpu"] = {"gpu": "", "mem": ""}
                    hwaccel_errors.append(args)
+        elif "vaapi" in args:
+            if not config.telemetry.stats.amd_gpu_stats:
+                continue
+
+            # AMD VAAPI GPU
+            amd_usage = get_amd_gpu_stats()
+
+            if amd_usage:
+                stats["amd-vaapi"] = amd_usage
+            else:
+                stats["amd-vaapi"] = {"gpu": "", "mem": ""}
+                hwaccel_errors.append(args)
        elif "preset-rk" in args:
            rga_usage = get_rockchip_gpu_stats()

--- a/frigate/test/test_gpu_stats.py
+++ b/frigate/test/test_gpu_stats.py
@ -39,8 +39,12 @@ class TestGpuStats(unittest.TestCase):
        process.stdout = self.intel_results
        sp.return_value = process
        intel_stats = get_intel_gpu_stats(False)
-        print(f"the intel stats are {intel_stats}")
+        # rc6 values: 47.844741 and 100.0 → avg 73.92 → gpu = 100 - 73.92 = 26.08%
+        # Render/3D/0: 0.0 and 0.0 → enc = 0.0%
+        # Video/0: 4.533124 and 0.0 → dec = 2.27%
        assert intel_stats == {
-            "gpu": "1.13%",
+            "gpu": "26.08%",
            "mem": "-%",
+            "compute": "0.0%",
+            "dec": "2.27%",
        }
--- a/frigate/util/services.py
+++ b/frigate/util/services.py
@ -265,14 +265,30 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:


 def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
-    """Get stats using intel_gpu_top."""
+    """Get stats using intel_gpu_top.
+
+    Returns overall GPU usage derived from rc6 residency (idle time),
+    plus individual engine breakdowns:
+      - enc: Render/3D engine (compute/shader encoder, used by QSV)
+      - dec: Video engines (fixed-function codec, used by VAAPI)
+    """

    def get_stats_manually(output: str) -> dict[str, str]:
        """Find global stats via regex when json fails to parse."""
        reading = "".join(output)
        results: dict[str, str] = {}

-        # render is used for qsv
+        # rc6 residency for overall GPU usage
+        rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
+        if rc6_match:
+            rc6_value = float(rc6_match.group(1))
+            results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
+        else:
+            results["gpu"] = "-%"
+
+        results["mem"] = "-%"
+
+        # Render/3D is the compute/encode engine
        render = []
        for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
            packet = json.loads(result[14:])
@ -280,11 +296,9 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
            render.append(float(single))

        if render:
-            render_avg = sum(render) / len(render)
-        else:
-            render_avg = 1
+            results["compute"] = f"{round(sum(render) / len(render), 2)}%"

-        # video is used for vaapi
+        # Video engines are the fixed-function decode engines
        video = []
        for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
            packet = json.loads(result[10:])
@ -292,12 +306,8 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
            video.append(float(single))

        if video:
-            video_avg = sum(video) / len(video)
-        else:
-            video_avg = 1
+            results["dec"] = f"{round(sum(video) / len(video), 2)}%"

-        results["gpu"] = f"{round((video_avg + render_avg) / 2, 2)}%"
-        results["mem"] = "-%"
        return results

    intel_gpu_top_command = [
@ -336,10 +346,18 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
            return get_stats_manually(output)

        results: dict[str, str] = {}
-        render = {"global": []}
-        video = {"global": []}
+        rc6_values = []
+        render_global = []
+        video_global = []
+        # per-client: {pid: [total_busy_per_sample, ...]}
+        client_usages: dict[str, list[float]] = {}

        for block in data:
+            # rc6 residency: percentage of time GPU is idle
+            rc6 = block.get("rc6", {}).get("value")
+            if rc6 is not None:
+                rc6_values.append(float(rc6))
+
            global_engine = block.get("engines")

            if global_engine:
@ -347,48 +365,53 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
                video_frame = global_engine.get("Video/0", {}).get("busy")

                if render_frame is not None:
-                    render["global"].append(float(render_frame))
+                    render_global.append(float(render_frame))

                if video_frame is not None:
-                    video["global"].append(float(video_frame))
+                    video_global.append(float(video_frame))

            clients = block.get("clients", {})

-            if clients and len(clients):
+            if clients:
                for client_block in clients.values():
-                    key = client_block["pid"]
+                    pid = client_block["pid"]

-                    if render.get(key) is None:
-                        render[key] = []
-                        video[key] = []
+                    if pid not in client_usages:
+                        client_usages[pid] = []

-                    client_engine = client_block.get("engine-classes", {})
+                    # Sum all engine-class busy values for this client
+                    total_busy = 0.0
+                    for engine in client_block.get("engine-classes", {}).values():
+                        busy = engine.get("busy")
+                        if busy is not None:
+                            total_busy += float(busy)

-                    render_frame = client_engine.get("Render/3D", {}).get("busy")
-                    video_frame = client_engine.get("Video", {}).get("busy")
+                    client_usages[pid].append(total_busy)

-                    if render_frame is not None:
-                        render[key].append(float(render_frame))
+        # Overall GPU usage from rc6 (idle) residency
+        if rc6_values:
+            rc6_avg = sum(rc6_values) / len(rc6_values)
+            results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"

-                    if video_frame is not None:
-                        video[key].append(float(video_frame))
+        results["mem"] = "-%"

-        if render["global"] and video["global"]:
-            results["gpu"] = (
-                f"{round(((sum(render['global']) / len(render['global'])) + (sum(video['global']) / len(video['global']))) / 2, 2)}%"
-            )
-            results["mem"] = "-%"
+        # Compute: Render/3D engine (compute/shader workloads and QSV encode)
+        if render_global:
+            results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"

-        if len(render.keys()) > 1:
+        # Decoder: Video engine (fixed-function codec)
+        if video_global:
+            results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"
+
+        # Per-client GPU usage (sum of all engines per process)
+        if client_usages:
            results["clients"] = {}

-            for key in render.keys():
-                if key == "global" or not render[key] or not video[key]:
-                    continue
-
-                results["clients"][key] = (
-                    f"{round(((sum(render[key]) / len(render[key])) + (sum(video[key]) / len(video[key]))) / 2, 2)}%"
-                )
+            for pid, samples in client_usages.items():
+                if samples:
+                    results["clients"][pid] = (
+                        f"{round(sum(samples) / len(samples), 2)}%"
+                    )

        return results

--- a/web/public/locales/en/views/system.json
+++ b/web/public/locales/en/views/system.json
@ -78,6 +78,7 @@
      "gpuUsage": "GPU Usage",
      "gpuMemory": "GPU Memory",
      "gpuEncoder": "GPU Encoder",
+      "gpuCompute": "GPU Compute / Encode",
      "gpuDecoder": "GPU Decoder",
      "gpuTemperature": "GPU Temperature",
      "gpuInfo": {
@ -188,6 +189,7 @@
      "cameraFfmpeg": "{{camName}} FFmpeg",
      "cameraCapture": "{{camName}} capture",
      "cameraDetect": "{{camName}} detect",
+      "cameraGpu": "{{camName}} GPU",
      "cameraFramesPerSecond": "{{camName}} frames per second",
      "cameraDetectionsPerSecond": "{{camName}} detections per second",
      "cameraSkippedDetectionsPerSecond": "{{camName}} skipped detections per second"
--- a/web/src/components/Statusbar.tsx
+++ b/web/src/components/Statusbar.tsx
@ -116,8 +116,7 @@ export default function Statusbar() {
            case "amd-vaapi":
              gpuTitle = "AMD GPU";
              break;
-            case "intel-vaapi":
-            case "intel-qsv":
+            case "intel-gpu":
              gpuTitle = "Intel GPU";
              break;
            case "rockchip":
--- a/web/src/types/stats.ts
+++ b/web/src/types/stats.ts
@ -60,8 +60,10 @@ export type GpuStats = {
  mem: string;
  enc?: string;
  dec?: string;
+  compute?: string;
  pstate?: string;
  temp?: number;
+  clients?: { [pid: string]: string };
 };

 export type NpuStats = {
--- a/web/src/views/system/GeneralMetrics.tsx
+++ b/web/src/views/system/GeneralMetrics.tsx
@ -76,7 +76,7 @@ export default function GeneralMetrics({

    statsHistory.length > 0 &&
      Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => {
-        if (key == "amd-vaapi" || key == "intel-vaapi" || key == "intel-qsv") {
+        if (key == "amd-vaapi" || key == "intel-gpu") {
          vaCount += 1;
        }

@ -265,7 +265,7 @@ export default function GeneralMetrics({

    if (
      Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 &&
-      Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0].includes("intel")
+      Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu"
    ) {
      // intel gpu stats do not support memory
      return undefined;
@ -334,6 +334,43 @@ export default function GeneralMetrics({
    return Object.keys(series).length > 0 ? Object.values(series) : undefined;
  }, [statsHistory]);

+  const gpuComputeSeries = useMemo(() => {
+    if (!statsHistory) {
+      return [];
+    }
+
+    const series: {
+      [key: string]: { name: string; data: { x: number; y: string }[] };
+    } = {};
+    let hasValidGpu = false;
+
+    statsHistory.forEach((stats, statsIdx) => {
+      if (!stats) {
+        return;
+      }
+
+      Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => {
+        if (!(key in series)) {
+          series[key] = { name: key, data: [] };
+        }
+
+        if (stats.compute) {
+          hasValidGpu = true;
+          series[key].data.push({
+            x: statsIdx + 1,
+            y: stats.compute.slice(0, -1),
+          });
+        }
+      });
+    });
+
+    if (!hasValidGpu) {
+      return [];
+    }
+
+    return Object.keys(series).length > 0 ? Object.values(series) : undefined;
+  }, [statsHistory]);
+
  const gpuDecSeries = useMemo(() => {
    if (!statsHistory) {
      return [];
@ -409,9 +446,7 @@ export default function GeneralMetrics({
    }

    const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {});
-    const hasIntelGpu = gpuKeys.some(
-      (key) => key === "intel-vaapi" || key === "intel-qsv",
-    );
+    const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu");

    if (!hasIntelGpu) {
      return false;
@ -427,7 +462,7 @@ export default function GeneralMetrics({
      }

      Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => {
-        if (key === "intel-vaapi" || key === "intel-qsv") {
+        if (key === "intel-gpu") {
          if (gpuStats.gpu) {
            hasDataPoints = true;
            const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
@ -744,8 +779,9 @@ export default function GeneralMetrics({
              className={cn(
                "mt-4 grid grid-cols-1 gap-2 sm:grid-cols-2",
                gpuTempSeries?.length && "md:grid-cols-3",
-                gpuEncSeries?.length && "xl:grid-cols-4",
-                gpuEncSeries?.length &&
+                (gpuEncSeries?.length || gpuComputeSeries?.length) &&
+                  "xl:grid-cols-4",
+                (gpuEncSeries?.length || gpuComputeSeries?.length) &&
                  gpuTempSeries?.length &&
                  "3xl:grid-cols-5",
              )}
@ -858,6 +894,30 @@ export default function GeneralMetrics({
                  ) : (
                    <Skeleton className="aspect-video w-full" />
                  )}
+                  {statsHistory.length != 0 ? (
+                    <>
+                      {gpuComputeSeries && gpuComputeSeries?.length != 0 && (
+                        <div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
+                          <div className="mb-5">
+                            {t("general.hardwareInfo.gpuCompute")}
+                          </div>
+                          {gpuComputeSeries.map((series) => (
+                            <ThresholdBarGraph
+                              key={series.name}
+                              graphId={`${series.name}-compute`}
+                              unit="%"
+                              name={series.name}
+                              threshold={GPUMemThreshold}
+                              updateTimes={updateTimes}
+                              data={[series]}
+                            />
+                          ))}
+                        </div>
+                      )}
+                    </>
+                  ) : (
+                    <Skeleton className="aspect-video w-full" />
+                  )}
                  {statsHistory.length != 0 ? (
                    <>
                      {gpuDecSeries && gpuDecSeries?.length != 0 && (