mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-30 11:54:52 +03:00
Refactor Intel Stats (#22674)
* Improve Intel stats collection * Update handling of stats to be simpler * Simplify handling * More accurately label Intel stats * Cleanup * Remove
This commit is contained in:
parent
29ca18c24c
commit
831cfc2444
@ -355,16 +355,37 @@ class CustomCollector(object):
|
||||
gpu_mem_usages = GaugeMetricFamily(
|
||||
"frigate_gpu_mem_usage_percent", "GPU memory usage %", labels=["gpu_name"]
|
||||
)
|
||||
gpu_enc_usages = GaugeMetricFamily(
|
||||
"frigate_gpu_encoder_usage_percent",
|
||||
"GPU encoder utilisation %",
|
||||
labels=["gpu_name"],
|
||||
)
|
||||
gpu_compute_usages = GaugeMetricFamily(
|
||||
"frigate_gpu_compute_usage_percent",
|
||||
"GPU compute / encode utilisation %",
|
||||
labels=["gpu_name"],
|
||||
)
|
||||
gpu_dec_usages = GaugeMetricFamily(
|
||||
"frigate_gpu_decoder_usage_percent",
|
||||
"GPU decoder utilisation %",
|
||||
labels=["gpu_name"],
|
||||
)
|
||||
|
||||
try:
|
||||
for gpu_name, gpu_stats in stats["gpu_usages"].items():
|
||||
self.add_metric(gpu_usages, [gpu_name], gpu_stats, "gpu")
|
||||
self.add_metric(gpu_mem_usages, [gpu_name], gpu_stats, "mem")
|
||||
self.add_metric(gpu_enc_usages, [gpu_name], gpu_stats, "enc")
|
||||
self.add_metric(gpu_compute_usages, [gpu_name], gpu_stats, "compute")
|
||||
self.add_metric(gpu_dec_usages, [gpu_name], gpu_stats, "dec")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
yield gpu_usages
|
||||
yield gpu_mem_usages
|
||||
yield gpu_enc_usages
|
||||
yield gpu_compute_usages
|
||||
yield gpu_dec_usages
|
||||
|
||||
# service stats
|
||||
uptime_seconds = GaugeMetricFamily(
|
||||
|
||||
@ -261,45 +261,33 @@ async def set_gpu_stats(
|
||||
else:
|
||||
stats["jetson-gpu"] = {"gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "qsv" in args:
|
||||
elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
|
||||
if not config.telemetry.stats.intel_gpu_stats:
|
||||
continue
|
||||
|
||||
# intel QSV GPU
|
||||
intel_usage = get_intel_gpu_stats(config.telemetry.stats.intel_gpu_device)
|
||||
|
||||
if intel_usage is not None:
|
||||
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
|
||||
else:
|
||||
stats["intel-qsv"] = {"gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "vaapi" in args:
|
||||
if is_vaapi_amd_driver():
|
||||
if not config.telemetry.stats.amd_gpu_stats:
|
||||
continue
|
||||
|
||||
# AMD VAAPI GPU
|
||||
amd_usage = get_amd_gpu_stats()
|
||||
|
||||
if amd_usage:
|
||||
stats["amd-vaapi"] = amd_usage
|
||||
else:
|
||||
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
else:
|
||||
if not config.telemetry.stats.intel_gpu_stats:
|
||||
continue
|
||||
|
||||
# intel VAAPI GPU
|
||||
if "intel-gpu" not in stats:
|
||||
# intel GPU (QSV or VAAPI both use the same physical GPU)
|
||||
intel_usage = get_intel_gpu_stats(
|
||||
config.telemetry.stats.intel_gpu_device
|
||||
)
|
||||
|
||||
if intel_usage is not None:
|
||||
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}
|
||||
stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
|
||||
else:
|
||||
stats["intel-vaapi"] = {"gpu": "", "mem": ""}
|
||||
stats["intel-gpu"] = {"gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "vaapi" in args:
|
||||
if not config.telemetry.stats.amd_gpu_stats:
|
||||
continue
|
||||
|
||||
# AMD VAAPI GPU
|
||||
amd_usage = get_amd_gpu_stats()
|
||||
|
||||
if amd_usage:
|
||||
stats["amd-vaapi"] = amd_usage
|
||||
else:
|
||||
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
|
||||
hwaccel_errors.append(args)
|
||||
elif "preset-rk" in args:
|
||||
rga_usage = get_rockchip_gpu_stats()
|
||||
|
||||
|
||||
@ -39,8 +39,12 @@ class TestGpuStats(unittest.TestCase):
|
||||
process.stdout = self.intel_results
|
||||
sp.return_value = process
|
||||
intel_stats = get_intel_gpu_stats(False)
|
||||
print(f"the intel stats are {intel_stats}")
|
||||
# rc6 values: 47.844741 and 100.0 → avg 73.92 → gpu = 100 - 73.92 = 26.08%
|
||||
# Render/3D/0: 0.0 and 0.0 → enc = 0.0%
|
||||
# Video/0: 4.533124 and 0.0 → dec = 2.27%
|
||||
assert intel_stats == {
|
||||
"gpu": "1.13%",
|
||||
"gpu": "26.08%",
|
||||
"mem": "-%",
|
||||
"compute": "0.0%",
|
||||
"dec": "2.27%",
|
||||
}
|
||||
|
||||
@ -265,14 +265,30 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:
|
||||
|
||||
|
||||
def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
|
||||
"""Get stats using intel_gpu_top."""
|
||||
"""Get stats using intel_gpu_top.
|
||||
|
||||
Returns overall GPU usage derived from rc6 residency (idle time),
|
||||
plus individual engine breakdowns:
|
||||
- enc: Render/3D engine (compute/shader encoder, used by QSV)
|
||||
- dec: Video engines (fixed-function codec, used by VAAPI)
|
||||
"""
|
||||
|
||||
def get_stats_manually(output: str) -> dict[str, str]:
|
||||
"""Find global stats via regex when json fails to parse."""
|
||||
reading = "".join(output)
|
||||
results: dict[str, str] = {}
|
||||
|
||||
# render is used for qsv
|
||||
# rc6 residency for overall GPU usage
|
||||
rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
|
||||
if rc6_match:
|
||||
rc6_value = float(rc6_match.group(1))
|
||||
results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
|
||||
else:
|
||||
results["gpu"] = "-%"
|
||||
|
||||
results["mem"] = "-%"
|
||||
|
||||
# Render/3D is the compute/encode engine
|
||||
render = []
|
||||
for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
|
||||
packet = json.loads(result[14:])
|
||||
@ -280,11 +296,9 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
|
||||
render.append(float(single))
|
||||
|
||||
if render:
|
||||
render_avg = sum(render) / len(render)
|
||||
else:
|
||||
render_avg = 1
|
||||
results["compute"] = f"{round(sum(render) / len(render), 2)}%"
|
||||
|
||||
# video is used for vaapi
|
||||
# Video engines are the fixed-function decode engines
|
||||
video = []
|
||||
for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
|
||||
packet = json.loads(result[10:])
|
||||
@ -292,12 +306,8 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
|
||||
video.append(float(single))
|
||||
|
||||
if video:
|
||||
video_avg = sum(video) / len(video)
|
||||
else:
|
||||
video_avg = 1
|
||||
results["dec"] = f"{round(sum(video) / len(video), 2)}%"
|
||||
|
||||
results["gpu"] = f"{round((video_avg + render_avg) / 2, 2)}%"
|
||||
results["mem"] = "-%"
|
||||
return results
|
||||
|
||||
intel_gpu_top_command = [
|
||||
@ -336,10 +346,18 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
|
||||
return get_stats_manually(output)
|
||||
|
||||
results: dict[str, str] = {}
|
||||
render = {"global": []}
|
||||
video = {"global": []}
|
||||
rc6_values = []
|
||||
render_global = []
|
||||
video_global = []
|
||||
# per-client: {pid: [total_busy_per_sample, ...]}
|
||||
client_usages: dict[str, list[float]] = {}
|
||||
|
||||
for block in data:
|
||||
# rc6 residency: percentage of time GPU is idle
|
||||
rc6 = block.get("rc6", {}).get("value")
|
||||
if rc6 is not None:
|
||||
rc6_values.append(float(rc6))
|
||||
|
||||
global_engine = block.get("engines")
|
||||
|
||||
if global_engine:
|
||||
@ -347,48 +365,53 @@ def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, s
|
||||
video_frame = global_engine.get("Video/0", {}).get("busy")
|
||||
|
||||
if render_frame is not None:
|
||||
render["global"].append(float(render_frame))
|
||||
render_global.append(float(render_frame))
|
||||
|
||||
if video_frame is not None:
|
||||
video["global"].append(float(video_frame))
|
||||
video_global.append(float(video_frame))
|
||||
|
||||
clients = block.get("clients", {})
|
||||
|
||||
if clients and len(clients):
|
||||
if clients:
|
||||
for client_block in clients.values():
|
||||
key = client_block["pid"]
|
||||
pid = client_block["pid"]
|
||||
|
||||
if render.get(key) is None:
|
||||
render[key] = []
|
||||
video[key] = []
|
||||
if pid not in client_usages:
|
||||
client_usages[pid] = []
|
||||
|
||||
client_engine = client_block.get("engine-classes", {})
|
||||
# Sum all engine-class busy values for this client
|
||||
total_busy = 0.0
|
||||
for engine in client_block.get("engine-classes", {}).values():
|
||||
busy = engine.get("busy")
|
||||
if busy is not None:
|
||||
total_busy += float(busy)
|
||||
|
||||
render_frame = client_engine.get("Render/3D", {}).get("busy")
|
||||
video_frame = client_engine.get("Video", {}).get("busy")
|
||||
client_usages[pid].append(total_busy)
|
||||
|
||||
if render_frame is not None:
|
||||
render[key].append(float(render_frame))
|
||||
# Overall GPU usage from rc6 (idle) residency
|
||||
if rc6_values:
|
||||
rc6_avg = sum(rc6_values) / len(rc6_values)
|
||||
results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"
|
||||
|
||||
if video_frame is not None:
|
||||
video[key].append(float(video_frame))
|
||||
results["mem"] = "-%"
|
||||
|
||||
if render["global"] and video["global"]:
|
||||
results["gpu"] = (
|
||||
f"{round(((sum(render['global']) / len(render['global'])) + (sum(video['global']) / len(video['global']))) / 2, 2)}%"
|
||||
)
|
||||
results["mem"] = "-%"
|
||||
# Compute: Render/3D engine (compute/shader workloads and QSV encode)
|
||||
if render_global:
|
||||
results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"
|
||||
|
||||
if len(render.keys()) > 1:
|
||||
# Decoder: Video engine (fixed-function codec)
|
||||
if video_global:
|
||||
results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"
|
||||
|
||||
# Per-client GPU usage (sum of all engines per process)
|
||||
if client_usages:
|
||||
results["clients"] = {}
|
||||
|
||||
for key in render.keys():
|
||||
if key == "global" or not render[key] or not video[key]:
|
||||
continue
|
||||
|
||||
results["clients"][key] = (
|
||||
f"{round(((sum(render[key]) / len(render[key])) + (sum(video[key]) / len(video[key]))) / 2, 2)}%"
|
||||
)
|
||||
for pid, samples in client_usages.items():
|
||||
if samples:
|
||||
results["clients"][pid] = (
|
||||
f"{round(sum(samples) / len(samples), 2)}%"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@ -78,6 +78,7 @@
|
||||
"gpuUsage": "GPU Usage",
|
||||
"gpuMemory": "GPU Memory",
|
||||
"gpuEncoder": "GPU Encoder",
|
||||
"gpuCompute": "GPU Compute / Encode",
|
||||
"gpuDecoder": "GPU Decoder",
|
||||
"gpuTemperature": "GPU Temperature",
|
||||
"gpuInfo": {
|
||||
@ -188,6 +189,7 @@
|
||||
"cameraFfmpeg": "{{camName}} FFmpeg",
|
||||
"cameraCapture": "{{camName}} capture",
|
||||
"cameraDetect": "{{camName}} detect",
|
||||
"cameraGpu": "{{camName}} GPU",
|
||||
"cameraFramesPerSecond": "{{camName}} frames per second",
|
||||
"cameraDetectionsPerSecond": "{{camName}} detections per second",
|
||||
"cameraSkippedDetectionsPerSecond": "{{camName}} skipped detections per second"
|
||||
|
||||
@ -116,8 +116,7 @@ export default function Statusbar() {
|
||||
case "amd-vaapi":
|
||||
gpuTitle = "AMD GPU";
|
||||
break;
|
||||
case "intel-vaapi":
|
||||
case "intel-qsv":
|
||||
case "intel-gpu":
|
||||
gpuTitle = "Intel GPU";
|
||||
break;
|
||||
case "rockchip":
|
||||
|
||||
@ -60,8 +60,10 @@ export type GpuStats = {
|
||||
mem: string;
|
||||
enc?: string;
|
||||
dec?: string;
|
||||
compute?: string;
|
||||
pstate?: string;
|
||||
temp?: number;
|
||||
clients?: { [pid: string]: string };
|
||||
};
|
||||
|
||||
export type NpuStats = {
|
||||
|
||||
@ -76,7 +76,7 @@ export default function GeneralMetrics({
|
||||
|
||||
statsHistory.length > 0 &&
|
||||
Object.keys(statsHistory[0]?.gpu_usages ?? {}).forEach((key) => {
|
||||
if (key == "amd-vaapi" || key == "intel-vaapi" || key == "intel-qsv") {
|
||||
if (key == "amd-vaapi" || key == "intel-gpu") {
|
||||
vaCount += 1;
|
||||
}
|
||||
|
||||
@ -265,7 +265,7 @@ export default function GeneralMetrics({
|
||||
|
||||
if (
|
||||
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {}).length == 1 &&
|
||||
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0].includes("intel")
|
||||
Object.keys(statsHistory?.at(0)?.gpu_usages ?? {})[0] === "intel-gpu"
|
||||
) {
|
||||
// intel gpu stats do not support memory
|
||||
return undefined;
|
||||
@ -334,6 +334,43 @@ export default function GeneralMetrics({
|
||||
return Object.keys(series).length > 0 ? Object.values(series) : undefined;
|
||||
}, [statsHistory]);
|
||||
|
||||
const gpuComputeSeries = useMemo(() => {
|
||||
if (!statsHistory) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const series: {
|
||||
[key: string]: { name: string; data: { x: number; y: string }[] };
|
||||
} = {};
|
||||
let hasValidGpu = false;
|
||||
|
||||
statsHistory.forEach((stats, statsIdx) => {
|
||||
if (!stats) {
|
||||
return;
|
||||
}
|
||||
|
||||
Object.entries(stats.gpu_usages || {}).forEach(([key, stats]) => {
|
||||
if (!(key in series)) {
|
||||
series[key] = { name: key, data: [] };
|
||||
}
|
||||
|
||||
if (stats.compute) {
|
||||
hasValidGpu = true;
|
||||
series[key].data.push({
|
||||
x: statsIdx + 1,
|
||||
y: stats.compute.slice(0, -1),
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (!hasValidGpu) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return Object.keys(series).length > 0 ? Object.values(series) : undefined;
|
||||
}, [statsHistory]);
|
||||
|
||||
const gpuDecSeries = useMemo(() => {
|
||||
if (!statsHistory) {
|
||||
return [];
|
||||
@ -409,9 +446,7 @@ export default function GeneralMetrics({
|
||||
}
|
||||
|
||||
const gpuKeys = Object.keys(statsHistory[0]?.gpu_usages ?? {});
|
||||
const hasIntelGpu = gpuKeys.some(
|
||||
(key) => key === "intel-vaapi" || key === "intel-qsv",
|
||||
);
|
||||
const hasIntelGpu = gpuKeys.some((key) => key === "intel-gpu");
|
||||
|
||||
if (!hasIntelGpu) {
|
||||
return false;
|
||||
@ -427,7 +462,7 @@ export default function GeneralMetrics({
|
||||
}
|
||||
|
||||
Object.entries(stats.gpu_usages || {}).forEach(([key, gpuStats]) => {
|
||||
if (key === "intel-vaapi" || key === "intel-qsv") {
|
||||
if (key === "intel-gpu") {
|
||||
if (gpuStats.gpu) {
|
||||
hasDataPoints = true;
|
||||
const gpuValue = parseFloat(gpuStats.gpu.slice(0, -1));
|
||||
@ -744,8 +779,9 @@ export default function GeneralMetrics({
|
||||
className={cn(
|
||||
"mt-4 grid grid-cols-1 gap-2 sm:grid-cols-2",
|
||||
gpuTempSeries?.length && "md:grid-cols-3",
|
||||
gpuEncSeries?.length && "xl:grid-cols-4",
|
||||
gpuEncSeries?.length &&
|
||||
(gpuEncSeries?.length || gpuComputeSeries?.length) &&
|
||||
"xl:grid-cols-4",
|
||||
(gpuEncSeries?.length || gpuComputeSeries?.length) &&
|
||||
gpuTempSeries?.length &&
|
||||
"3xl:grid-cols-5",
|
||||
)}
|
||||
@ -858,6 +894,30 @@ export default function GeneralMetrics({
|
||||
) : (
|
||||
<Skeleton className="aspect-video w-full" />
|
||||
)}
|
||||
{statsHistory.length != 0 ? (
|
||||
<>
|
||||
{gpuComputeSeries && gpuComputeSeries?.length != 0 && (
|
||||
<div className="rounded-lg bg-background_alt p-2.5 md:rounded-2xl">
|
||||
<div className="mb-5">
|
||||
{t("general.hardwareInfo.gpuCompute")}
|
||||
</div>
|
||||
{gpuComputeSeries.map((series) => (
|
||||
<ThresholdBarGraph
|
||||
key={series.name}
|
||||
graphId={`${series.name}-compute`}
|
||||
unit="%"
|
||||
name={series.name}
|
||||
threshold={GPUMemThreshold}
|
||||
updateTimes={updateTimes}
|
||||
data={[series]}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<Skeleton className="aspect-video w-full" />
|
||||
)}
|
||||
{statsHistory.length != 0 ? (
|
||||
<>
|
||||
{gpuDecSeries && gpuDecSeries?.length != 0 && (
|
||||
|
||||
Loading…
Reference in New Issue
Block a user