Refactor Intel Stats (#22674)

* Improve Intel stats collection

* Update handling of stats to be simpler

* Simplify handling

* More accurately label Intel stats

* Cleanup

* Remove
This commit is contained in:
Nicolas Mowen 2026-03-29 11:09:02 -06:00 committed by GitHub
parent 29ca18c24c
commit 831cfc2444
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 180 additions and 81 deletions

View File

@ -355,16 +355,37 @@ class CustomCollector(object):
gpu_mem_usages = GaugeMetricFamily( gpu_mem_usages = GaugeMetricFamily(
"frigate_gpu_mem_usage_percent", "GPU memory usage %", labels=["gpu_name"] "frigate_gpu_mem_usage_percent", "GPU memory usage %", labels=["gpu_name"]
) )
gpu_enc_usages = GaugeMetricFamily(
"frigate_gpu_encoder_usage_percent",
"GPU encoder utilisation %",
labels=["gpu_name"],
)
gpu_compute_usages = GaugeMetricFamily(
"frigate_gpu_compute_usage_percent",
"GPU compute / encode utilisation %",
labels=["gpu_name"],
)
gpu_dec_usages = GaugeMetricFamily(
"frigate_gpu_decoder_usage_percent",
"GPU decoder utilisation %",
labels=["gpu_name"],
)
try: try:
for gpu_name, gpu_stats in stats["gpu_usages"].items(): for gpu_name, gpu_stats in stats["gpu_usages"].items():
self.add_metric(gpu_usages, [gpu_name], gpu_stats, "gpu") self.add_metric(gpu_usages, [gpu_name], gpu_stats, "gpu")
self.add_metric(gpu_mem_usages, [gpu_name], gpu_stats, "mem") self.add_metric(gpu_mem_usages, [gpu_name], gpu_stats, "mem")
self.add_metric(gpu_enc_usages, [gpu_name], gpu_stats, "enc")
self.add_metric(gpu_compute_usages, [gpu_name], gpu_stats, "compute")
self.add_metric(gpu_dec_usages, [gpu_name], gpu_stats, "dec")
except KeyError: except KeyError:
pass pass
yield gpu_usages yield gpu_usages
yield gpu_mem_usages yield gpu_mem_usages
yield gpu_enc_usages
yield gpu_compute_usages
yield gpu_dec_usages
# service stats # service stats
uptime_seconds = GaugeMetricFamily( uptime_seconds = GaugeMetricFamily(

View File

@ -261,45 +261,33 @@ async def set_gpu_stats(
else: else:
stats["jetson-gpu"] = {"gpu": "", "mem": ""} stats["jetson-gpu"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args) hwaccel_errors.append(args)
elif "qsv" in args: elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
if not config.telemetry.stats.intel_gpu_stats: if not config.telemetry.stats.intel_gpu_stats:
continue continue
# intel QSV GPU if "intel-gpu" not in stats:
intel_usage = get_intel_gpu_stats(config.telemetry.stats.intel_gpu_device) # intel GPU (QSV or VAAPI both use the same physical GPU)
if intel_usage is not None:
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
else:
stats["intel-qsv"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "vaapi" in args:
if is_vaapi_amd_driver():
if not config.telemetry.stats.amd_gpu_stats:
continue
# AMD VAAPI GPU
amd_usage = get_amd_gpu_stats()
if amd_usage:
stats["amd-vaapi"] = amd_usage
else:
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
else:
if not config.telemetry.stats.intel_gpu_stats:
continue
# intel VAAPI GPU
intel_usage = get_intel_gpu_stats( intel_usage = get_intel_gpu_stats(
config.telemetry.stats.intel_gpu_device config.telemetry.stats.intel_gpu_device
) )
if intel_usage is not None: if intel_usage is not None:
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""} stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
else: else:
stats["intel-vaapi"] = {"gpu": "", "mem": ""} stats["intel-gpu"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args) hwaccel_errors.append(args)
elif "vaapi" in args:
if not config.telemetry.stats.amd_gpu_stats:
continue
# AMD VAAPI GPU
amd_usage = get_amd_gpu_stats()
if amd_usage:
stats["amd-vaapi"] = amd_usage
else:
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "preset-rk" in args: elif "preset-rk" in args:
rga_usage = get_rockchip_gpu_stats() rga_usage = get_rockchip_gpu_stats()

View File

@ -39,8 +39,12 @@ class TestGpuStats(unittest.TestCase):
process.stdout = self.intel_results process.stdout = self.intel_results
sp.return_value = process sp.return_value = process
intel_stats = get_intel_gpu_stats(False) intel_stats = get_intel_gpu_stats(False)
print(f"the intel stats are {intel_stats}") # rc6 values: 47.844741 and 100.0 → avg 73.92 → gpu = 100 - 73.92 = 26.08%
# Render/3D/0: 0.0 and 0.0 → enc = 0.0%
# Video/0: 4.533124 and 0.0 → dec = 2.27%
assert intel_stats == { assert intel_stats == {
"gpu": "1.13%", "gpu": "26.08%",
"mem": "-%", "mem": "-%",
"compute": "0.0%",
"dec": "2.27%",
} }

View File

@ -265,14 +265,30 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]: def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
"""Get stats using intel_gpu_top.""" """Get stats using intel_gpu_top.
Returns overall GPU usage derived from rc6 residency (idle time),
plus individual engine breakdowns:
- enc: Render/3D engine (compute/shader encoder, used by QSV)
- dec: Video engines (fixed-function codec, used by VAAPI)
"""
def get_stats_manually(output: str) -> dict[str, str]: def get_stats_manually(output: str) -> dict[str, str]:
"""Find global stats via regex when json fails to parse.""" """Find global stats via regex when json fails to parse."""
reading = "".join(output) reading = "".join(output)
results: dict[str, str] = {} results: dict[str, str] = {}
# render is used for qsv # rc6 residency for overall GPU usage
rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
if rc6_match:
rc6_value = float(rc6_match.group(1))
results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
else:
results["gpu"] = "-%"
results["mem"] = "-%"
# Render/3D is the compute/encode engine
render = [] render = []
for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading): for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
packet = json.loads(result[14:]) packet = json.loads(result[14:])
@ -280,11 +296,9 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
render.append(float(single)) render.append(float(single))
if render: if render:
render_avg = sum(render) / len(render) results["compute"] = f"{round(sum(render) / len(render), 2)}%"
else:
render_avg = 1
# video is used for vaapi # Video engines are the fixed-function decode engines
video = [] video = []
for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading): for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
packet = json.loads(result[10:]) packet = json.loads(result[10:])
@ -292,12 +306,8 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
video.append(float(single)) video.append(float(single))
if video: if video:
video_avg = sum(video) / len(video) results["dec"] = f"{round(sum(video) / len(video), 2)}%"
else:
video_avg = 1
results["gpu"] = f"{round((video_avg + render_avg) / 2, 2)}%"
results["mem"] = "-%"
return results return results
intel_gpu_top_command = [ intel_gpu_top_command = [
@ -336,10 +346,18 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
return get_stats_manually(output) return get_stats_manually(output)
results: dict[str, str] = {} results: dict[str, str] = {}
render = {"global": []} rc6_values = []
video = {"global": []} render_global = []
video_global = []
# per-client: {pid: [total_busy_per_sample, ...]}
client_usages: dict[str, list[float]] = {}
for block in data: for block in data:
# rc6 residency: percentage of time GPU is idle
rc6 = block.get("rc6", {}).get("value")
if rc6 is not None:
rc6_values.append(float(rc6))
global_engine = block.get("engines") global_engine = block.get("engines")
if global_engine: if global_engine:
@ -347,48 +365,53 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
video_frame = global_engine.get("Video/0", {}).get("busy") video_frame = global_engine.get("Video/0", {}).get("busy")
if render_frame is not None: if render_frame is not None:
render["global"].append(float(render_frame)) render_global.append(float(render_frame))
if video_frame is not None: if video_frame is not None:
video["global"].append(float(video_frame)) video_global.append(float(video_frame))
clients = block.get("clients", {}) clients = block.get("clients", {})
if clients and len(clients): if clients:
for client_block in clients.values(): for client_block in clients.values():
key = client_block["pid"] pid = client_block["pid"]
if render.get(key) is None: if pid not in client_usages:
render[key] = [] client_usages[pid] = []
video[key] = []
client_engine = client_block.get("engine-classes", {}) # Sum all engine-class busy values for this client
total_busy = 0.0
for engine in client_block.get("engine-classes", {}).values():
busy = engine.get("busy")
if busy is not None:
total_busy += float(busy)
render_frame = client_engine.get("Render/3D", {}).get("busy") client_usages[pid].append(total_busy)
video_frame = client_engine.get("Video", {}).get("busy")
if render_frame is not None: # Overall GPU usage from rc6 (idle) residency
render[key].append(float(render_frame)) if rc6_values:
rc6_avg = sum(rc6_values) / len(rc6_values)
results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"
if video_frame is not None: results["mem"] = "-%"
video[key].append(float(video_frame))
if render["global"] and video["global"]: # Compute: Render/3D engine (compute/shader workloads and QSV encode)
results["gpu"] = ( if render_global:
f"{round(((sum(render['global']) / len(render['global'])) + (sum(video['global']) / len(video['global']))) / 2, 2)}%" results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"
)
results["mem"] = "-%"
if len(render.keys()) > 1: # Decoder: Video engine (fixed-function codec)
if video_global:
results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"
# Per-client GPU usage (sum of all engines per process)
if client_usages:
results["clients"] = {} results["clients"] = {}
for key in render.keys(): for pid, samples in client_usages.items():
if key == "global" or not render[key] or not video[key]: if samples:
continue results["clients"][pid] = (
f"{round(sum(samples) / len(samples), 2)}%"
results["clients"][key] = ( )
f"{round(((sum(render[key]) / len(render[key])) + (sum(video[key]) / len(video[key]))) / 2, 2)}%"
)
return results return results

View File

@ -78,6 +78,7 @@
"gpuUsage": "GPU Usage", "gpuUsage": "GPU Usage",
"gpuMemory": "GPU Memory", "gpuMemory": "GPU Memory",
"gpuEncoder": "GPU Encoder", "gpuEncoder": "GPU Encoder",
"gpuCompute": "GPU Compute / Encode",
"gpuDecoder": "GPU Decoder", "gpuDecoder": "GPU Decoder",
"gpuTemperature": "GPU Temperature", "gpuTemperature": "GPU Temperature",
"gpuInfo": { "gpuInfo": {
@ -188,6 +189,7 @@
"cameraFfmpeg": "{{camName}} FFmpeg", "cameraFfmpeg": "{{camName}} FFmpeg",
"cameraCapture": "{{camName}} capture", "cameraCapture": "{{camName}} capture",
"cameraDetect": "{{camName}} detect", "cameraDetect": "{{camName}} detect",
"cameraGpu": "{{camName}} GPU",
"cameraFramesPerSecond": "{{camName}} frames per second", "cameraFramesPerSecond": "{{camName}} frames per second",
"cameraDetectionsPerSecond": "{{camName}} detections per second", "cameraDetectionsPerSecond": "{{camName}} detections per second",
"cameraSkippedDetectionsPerSecond": "{{camName}} skipped detections per second" "cameraSkippedDetectionsPerSecond": "{{camName}} skipped detections per second"

View File

@ -116,8 +116,7 @@ export default function Statusbar() {
case "amd-vaapi": case "amd-vaapi":
gpuTitle = "AMD GPU"; gpuTitle = "AMD GPU";
break; break;
case "intel-vaapi": case "intel-gpu":
case "intel-qsv":
gpuTitle = "Intel GPU"; gpuTitle = "Intel GPU";
break; break;
case "rockchip": case "rockchip":

View File

@ -60,8 +60,10 @@ export type GpuStats = {
mem: string; mem: string;
enc?: string; enc?: string;
dec?: string; dec?: string;
compute?: string;
pstate?: string; pstate?: string;
temp?: number; temp?: number;
clients?: { [pid: string]: string };
}; };
export type NpuStats = { export type NpuStats = {

View File

@ -76,7 +76,7 @@ export default function GeneralMetrics({
statsHistory.length > 0 && statsHistory.length > 0 &&
Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => { Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => {
if (key == "amd-vaapi" || key == "intel-vaapi" || key == "intel-qsv") { if (key == "amd-vaapi" || key == "intel-gpu") {
vaCount += 1; vaCount += 1;
} }
@ -265,7 +265,7 @@ export default function GeneralMetrics({
if ( if (
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 && Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 &&
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0].includes("intel") Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu"
) { ) {
// intel gpu stats do not support memory // intel gpu stats do not support memory
return undefined; return undefined;
@ -334,6 +334,43 @@ export default function GeneralMetrics({
return Object.keys(series).length > 0 ? Object.values(series) : undefined; return Object.keys(series).length > 0 ? Object.values(series) : undefined;
}, [statsHistory]); }, [statsHistory]);
const gpuComputeSeries = useMemo(() => {
if (!statsHistory) {
return [];
}
const series: {
[key: string]: { name: string; data: { x: number; y: string }[] };
} = {};
let hasValidGpu = false;
statsHistory.forEach((stats, statsIdx) => {
if (!stats) {
return;
}
Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => {
if (!(key in series)) {
series[key] = { name: key, data: [] };
}
if (stats.compute) {
hasValidGpu = true;
series[key].data.push({
x: statsIdx + 1,
y: stats.compute.slice(0, -1),
});
}
});
});
if (!hasValidGpu) {
return [];
}
return Object.keys(series).length > 0 ? Object.values(series) : undefined;
}, [statsHistory]);
const gpuDecSeries = useMemo(() => { const gpuDecSeries = useMemo(() => {
if (!statsHistory) { if (!statsHistory) {
return []; return [];
@ -409,9 +446,7 @@ export default function GeneralMetrics({
} }
const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {}); const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {});
const hasIntelGpu = gpuKeys.some( const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu");
(key) => key === "intel-vaapi" || key === "intel-qsv",
);
if (!hasIntelGpu) { if (!hasIntelGpu) {
return false; return false;
@ -427,7 +462,7 @@ export default function GeneralMetrics({
} }
Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => { Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => {
if (key === "intel-vaapi" || key === "intel-qsv") { if (key === "intel-gpu") {
if (gpuStats.gpu) { if (gpuStats.gpu) {
hasDataPoints = true; hasDataPoints = true;
const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1)); const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
@ -744,8 +779,9 @@ export default function GeneralMetrics({
className={cn( className={cn(
"mt-4 grid grid-cols-1 gap-2 sm:grid-cols-2", "mt-4 grid grid-cols-1 gap-2 sm:grid-cols-2",
gpuTempSeries?.length && "md:grid-cols-3", gpuTempSeries?.length && "md:grid-cols-3",
gpuEncSeries?.length && "xl:grid-cols-4", (gpuEncSeries?.length || gpuComputeSeries?.length) &&
gpuEncSeries?.length && "xl:grid-cols-4",
(gpuEncSeries?.length || gpuComputeSeries?.length) &&
gpuTempSeries?.length && gpuTempSeries?.length &&
"3xl:grid-cols-5", "3xl:grid-cols-5",
)} )}
@ -858,6 +894,30 @@ export default function GeneralMetrics({
) : ( ) : (
<Skeleton className="aspect-video w-full" /> <Skeleton className="aspect-video w-full" />
)} )}
{statsHistory.length != 0 ? (
<>
{gpuComputeSeries && gpuComputeSeries?.length != 0 && (
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
<div className="mb-5">
{t("general.hardwareInfo.gpuCompute")}
</div>
{gpuComputeSeries.map((series) => (
<ThresholdBarGraph
key={series.name}
graphId={`${series.name}-compute`}
unit="%"
name={series.name}
threshold={GPUMemThreshold}
updateTimes={updateTimes}
data={[series]}
/>
))}
</div>
)}
</>
) : (
<Skeleton className="aspect-video w-full" />
)}
{statsHistory.length != 0 ? ( {statsHistory.length != 0 ? (
<> <>
{gpuDecSeries && gpuDecSeries?.length != 0 && ( {gpuDecSeries && gpuDecSeries?.length != 0 && (