Add option to treat GPU as SRIOV in order for stats to work correctly

This commit is contained in:
Nicolas Mowen 2025-01-03 07:34:32 -07:00
parent 54bbfae433
commit d3afa61a07
4 changed files with 14 additions and 6 deletions

View File

@ -813,11 +813,13 @@ telemetry:
- lo - lo
# Optional: Configure system stats # Optional: Configure system stats
stats: stats:
# Enable AMD GPU stats (default: shown below) # Optional: Enable AMD GPU stats (default: shown below)
amd_gpu_stats: True amd_gpu_stats: True
# Enable Intel GPU stats (default: shown below) # Optional: Enable Intel GPU stats (default: shown below)
intel_gpu_stats: True intel_gpu_stats: True
# Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below) # Optional: Treat GPU as SR-IOV to fix GPU stats (default: shown below)
sriov: False
# Optional: Enable network bandwidth stats monitoring for camera ffmpeg processes, go2rtc, and object detectors. (default: shown below)
# NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled. # NOTE: The container must either be privileged or have cap_net_admin, cap_net_raw capabilities enabled.
network_bandwidth: False network_bandwidth: False
# Optional: Enable the latest version outbound check (default: shown below) # Optional: Enable the latest version outbound check (default: shown below)

View File

@ -11,6 +11,9 @@ class StatsConfig(FrigateBaseModel):
network_bandwidth: bool = Field( network_bandwidth: bool = Field(
default=False, title="Enable network bandwidth for ffmpeg processes." default=False, title="Enable network bandwidth for ffmpeg processes."
) )
sriov: bool = Field(
default=False, title="Treat device as SR-IOV to support GPU stats."
)
class TelemetryConfig(FrigateBaseModel): class TelemetryConfig(FrigateBaseModel):

View File

@ -195,7 +195,7 @@ async def set_gpu_stats(
continue continue
# intel QSV GPU # intel QSV GPU
intel_usage = get_intel_gpu_stats() intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov)
if intel_usage is not None: if intel_usage is not None:
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""} stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
@ -220,7 +220,7 @@ async def set_gpu_stats(
continue continue
# intel VAAPI GPU # intel VAAPI GPU
intel_usage = get_intel_gpu_stats() intel_usage = get_intel_gpu_stats(config.telemetry.stats.sriov)
if intel_usage is not None: if intel_usage is not None:
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""} stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}

View File

@ -255,7 +255,7 @@ def get_amd_gpu_stats() -> dict[str, str]:
return results return results
def get_intel_gpu_stats() -> dict[str, str]: def get_intel_gpu_stats(sriov: bool) -> dict[str, str]:
"""Get stats using intel_gpu_top.""" """Get stats using intel_gpu_top."""
def get_stats_manually(output: str) -> dict[str, str]: def get_stats_manually(output: str) -> dict[str, str]:
@ -302,6 +302,9 @@ def get_intel_gpu_stats() -> dict[str, str]:
"1", "1",
] ]
if sriov:
intel_gpu_top_command += ["-d", "drm:/dev/dri/card0"]
p = sp.run( p = sp.run(
intel_gpu_top_command, intel_gpu_top_command,
encoding="ascii", encoding="ascii",