Compare commits

...

5 Commits

Author SHA1 Message Date
Josh Hawkins
2c92a38124 add area 2026-05-27 10:13:05 -05:00
Nicolas Mowen
767853636c Add error logs for Intel GPU stats 2026-05-27 09:09:42 -06:00
Nicolas Mowen
26e25c159d Store hwaccel errors with timeout so it can retry 2026-05-27 09:09:42 -06:00
Josh Hawkins
fc5fbad047 tweak language 2026-05-27 10:08:57 -05:00
Josh Hawkins
c5bbf6db88 add more tips to object classification docs 2026-05-27 09:58:21 -05:00
4 changed files with 47 additions and 13 deletions

View File

@ -149,9 +149,16 @@ For more detail, see [Frigate Tip: Best Practices for Training Face and Custom C
- **The wizard is just the starting point**: You don't need to find and label every class upfront. Missing classes will naturally appear in Recent Classifications, and those images tend to be more valuable because they represent new conditions and edge cases.
- **Problem framing**: Keep classes visually distinct and relevant to the chosen object types.
- **Preprocessing**: Ensure examples reflect object crops similar to Frigate's boxes; keep the subject centered.
- **Labels**: Keep label names short and consistent; include a `none` class if you plan to ignore uncertain predictions for sub labels.
- **Crop size**: Aim for crops of at least 100×100 pixels (a 10,000 pixel area). Crops smaller than ~80×80 get stretched 3-7× by the model's 224×224 input resize and tend to collapse into a generic "blob" region of feature space where identity becomes unreliable. If most of your detections are small because the camera is far from the subject, consider repositioning the camera for closer crops.
- **Class balance**: Aim to keep your largest class within ~3× the count of your smallest. Beyond that, the model becomes biased toward the dominant class and tends to default borderline predictions to it (the "everything looks like Buddy" failure mode).
- **Threshold**: Tune `threshold` per model to reduce false assignments. Start at `0.8` and adjust based on validation.
:::tip `none` works differently from named classes
Named classes work best with visually uniform examples — every Buddy photo should look like Buddy. The `none` class needs the opposite: visual diversity across sizes, framings, and qualities, because at inference it has to absorb everything that isn't one of your named classes. Don't apply the same "only keep large, well-framed images" rule to `none` that you would to a named class. Mix in small crops, partial views, and false positives deliberately - otherwise the model has no signal for "small/ambiguous thing = not one of my known classes" and will force those crops into a named class by default.
:::
## Debugging Classification Models
To troubleshoot issues with object classification models, enable debug logging to see detailed information about classification attempts, scores, and consensus calculations.

View File

@ -32,7 +32,7 @@ class StatsEmitter(threading.Thread):
self.config = config
self.stats_tracking = stats_tracking
self.stop_event = stop_event
self.hwaccel_errors: list[str] = []
self.hwaccel_errors: dict[str, float] = {}
self.stats_history: list[dict[str, Any]] = []
# create communication for stats

View File

@ -1,6 +1,7 @@
"""Utilities for stats."""
import asyncio
import logging
import os
import shutil
import time
@ -34,6 +35,10 @@ from frigate.util.services import (
)
from frigate.version import VERSION
logger = logging.getLogger(__name__)
HWACCEL_ERROR_COOLDOWN_SECONDS = 3600
def get_latest_version(config: FrigateConfig) -> str:
if not config.telemetry.version_check:
@ -167,7 +172,9 @@ def get_detector_stats(
def get_processing_stats(
config: FrigateConfig, stats: dict[str, str], hwaccel_errors: list[str]
config: FrigateConfig,
stats: dict[str, str],
hwaccel_errors: dict[str, float],
) -> None:
"""Get stats for cpu / gpu."""
@ -206,7 +213,9 @@ async def set_bandwidth_stats(config: FrigateConfig, all_stats: dict[str, Any])
async def set_gpu_stats(
config: FrigateConfig, all_stats: dict[str, Any], hwaccel_errors: list[str]
config: FrigateConfig,
all_stats: dict[str, Any],
hwaccel_errors: dict[str, float],
) -> None:
"""Parse GPUs from hwaccel args and use for stats."""
hwaccel_args = []
@ -231,12 +240,16 @@ async def set_gpu_stats(
stats: dict[str, dict] = {}
intel_gpu_collected = False
now = time.monotonic()
for args in hwaccel_args:
if args in hwaccel_errors:
# known erroring args should automatically return as error
stats["error-gpu"] = {"gpu": "", "mem": ""}
elif "cuvid" in args or "nvidia" in args:
last_error = hwaccel_errors.get(args)
if last_error is not None:
if now - last_error < HWACCEL_ERROR_COOLDOWN_SECONDS:
continue
hwaccel_errors.pop(args, None)
if "cuvid" in args or "nvidia" in args:
# nvidia GPU
nvidia_usage = get_nvidia_gpu_stats()
@ -253,7 +266,7 @@ async def set_gpu_stats(
else:
stats["nvidia-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
hwaccel_errors.append(args)
hwaccel_errors[args] = time.monotonic()
elif "nvmpi" in args or "jetson" in args:
# nvidia Jetson
jetson_usage = get_jetson_stats()
@ -262,7 +275,7 @@ async def set_gpu_stats(
stats["jetson-gpu"] = {"vendor": "nvidia", **jetson_usage}
else:
stats["jetson-gpu"] = {"vendor": "nvidia", "gpu": "", "mem": ""}
hwaccel_errors.append(args)
hwaccel_errors[args] = time.monotonic()
elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
if not config.telemetry.stats.intel_gpu_stats:
continue
@ -280,7 +293,7 @@ async def set_gpu_stats(
stats[name] = entry
else:
stats["intel-gpu"] = {"vendor": "intel", "gpu": "", "mem": ""}
hwaccel_errors.append(args)
hwaccel_errors[args] = time.monotonic()
elif "vaapi" in args:
if not config.telemetry.stats.amd_gpu_stats:
continue
@ -292,7 +305,7 @@ async def set_gpu_stats(
stats["amd-vaapi"] = {"vendor": "amd", **amd_usage}
else:
stats["amd-vaapi"] = {"vendor": "amd", "gpu": "", "mem": ""}
hwaccel_errors.append(args)
hwaccel_errors[args] = time.monotonic()
elif "preset-rk" in args:
rga_usage = get_rockchip_gpu_stats()
@ -328,7 +341,9 @@ async def set_npu_usages(config: FrigateConfig, all_stats: dict[str, Any]) -> No
def stats_snapshot(
config: FrigateConfig, stats_tracking: StatsTrackingTypes, hwaccel_errors: list[str]
config: FrigateConfig,
stats_tracking: StatsTrackingTypes,
hwaccel_errors: dict[str, float],
) -> dict[str, Any]:
"""Get a snapshot of the current stats that are being tracked."""
camera_metrics = stats_tracking["camera_metrics"]

View File

@ -416,6 +416,11 @@ def get_intel_gpu_stats(
snapshot_a = _read_intel_drm_fdinfo(target_pdev)
if not snapshot_a:
logger.warning(
"Unable to collect Intel GPU stats: no DRM fdinfo entries found"
"%s. Check that /proc is readable and the i915/xe driver is loaded",
f" for pdev {target_pdev}" if target_pdev else "",
)
return None
start = time.monotonic()
@ -424,6 +429,9 @@ def get_intel_gpu_stats(
snapshot_b = _read_intel_drm_fdinfo(target_pdev)
if not snapshot_b or elapsed_ns <= 0:
logger.warning(
"Unable to collect Intel GPU stats: second DRM fdinfo sample was empty"
)
return None
def _new_engine_pct() -> dict[str, float]:
@ -464,6 +472,10 @@ def get_intel_gpu_stats(
pid_pct[data_b["pid"]] = pid_pct.get(data_b["pid"], 0.0) + client_total
if not per_pdev_engine_pct:
logger.warning(
"Unable to collect Intel GPU stats: no per-engine counters available "
"(i915 requires kernel >= 5.19)"
)
return None
names = intel_gpu_name_resolver.get_names()