diff --git a/frigate/api/fastapi_app.py b/frigate/api/fastapi_app.py index f201ab7135..3f8d8a7a5f 100644 --- a/frigate/api/fastapi_app.py +++ b/frigate/api/fastapi_app.py @@ -1,3 +1,4 @@ +import asyncio import logging import re from typing import Optional @@ -36,7 +37,7 @@ from frigate.comms.event_metadata_updater import ( from frigate.config import FrigateConfig from frigate.config.camera.updater import CameraConfigUpdatePublisher from frigate.config.profile_manager import ProfileManager -from frigate.debug_replay import DebugReplayManager +from frigate.debug_replay import DebugReplayManager, debug_replay_auto_stop_watchdog from frigate.embeddings import EmbeddingsContext from frigate.genai import GenAIClientManager from frigate.ptz.onvif import OnvifController @@ -116,6 +117,11 @@ def create_fastapi_app( @app.on_event("startup") async def startup(): logger.info("FastAPI started") + asyncio.create_task( + debug_replay_auto_stop_watchdog( + replay_manager, frigate_config, config_publisher + ) + ) # Rate limiter (used for login endpoint) if frigate_config.auth.failed_login_rate_limit is None: diff --git a/frigate/debug_replay.py b/frigate/debug_replay.py index ea95e153c1..956bc20012 100644 --- a/frigate/debug_replay.py +++ b/frigate/debug_replay.py @@ -5,6 +5,7 @@ frigate.jobs.debug_replay. This module owns only session presence (active), session metadata, and post-session cleanup. """ +import asyncio import logging import os import shutil @@ -40,6 +41,9 @@ from frigate.util.config import find_config_file logger = logging.getLogger(__name__) +MAX_SESSION_DURATION_SECONDS = 12 * 60 * 60 +AUTO_STOP_CHECK_INTERVAL_SECONDS = 60 + class DebugReplayManager: """Owns the lifecycle pointers for a single debug replay session. @@ -58,6 +62,7 @@ class DebugReplayManager: self.clip_path: str | None = None self.start_ts: float | None = None self.end_ts: float | None = None + self.session_started_at: float | None = None self._job_state_publisher = JobStatePublisher() @property @@ -83,6 +88,7 @@ class DebugReplayManager: self.start_ts = start_ts self.end_ts = end_ts self.clip_path = None + self.session_started_at = time.time() def mark_session_ready(self, clip_path: str) -> None: """Record the on-disk clip path after the camera has been published.""" @@ -104,6 +110,7 @@ class DebugReplayManager: self.clip_path = None self.start_ts = None self.end_ts = None + self.session_started_at = None def publish_camera( self, @@ -351,3 +358,41 @@ def cleanup_replay_cameras() -> None: shutil.rmtree(REPLAY_DIR) except Exception as e: logger.error("Failed to remove replay cache directory: %s", e) + + +async def debug_replay_auto_stop_watchdog( + manager: DebugReplayManager, + frigate_config: FrigateConfig, + config_publisher: CameraConfigUpdatePublisher, +) -> None: + """Auto-stop debug replay sessions that exceed MAX_SESSION_DURATION_SECONDS. + + Backstop against a session left running for days. The cap is intentionally + generous so realistic tuning and overnight soak workflows aren't disrupted. + """ + while True: + try: + await asyncio.sleep(AUTO_STOP_CHECK_INTERVAL_SECONDS) + + started_at = manager.session_started_at + if not manager.active or started_at is None: + continue + + if time.time() - started_at < MAX_SESSION_DURATION_SECONDS: + continue + + replay_name = manager.replay_camera_name + await asyncio.to_thread( + manager.stop, + frigate_config=frigate_config, + config_publisher=config_publisher, + ) + logger.info( + "Debug replay auto-stopped after exceeding max session duration of %d hours: %s", + MAX_SESSION_DURATION_SECONDS // 3600, + replay_name, + ) + except asyncio.CancelledError: + raise + except Exception: + logger.exception("Error in debug replay auto-stop watchdog")