frigate/frigate/video/ffmpeg.py

695 lines
27 KiB
Python
Raw Normal View History

2026-03-25 17:44:12 +03:00
"""Manages ffmpeg processes for camera frame capture."""
2020-11-04 06:26:39 +03:00
import logging
2020-11-04 15:31:25 +03:00
import queue
2021-10-31 19:12:44 +03:00
import subprocess as sp
2020-11-04 15:31:25 +03:00
import threading
import time
from collections import deque
from datetime import datetime, timedelta, timezone
from multiprocessing import Queue, Value
from multiprocessing.synchronize import Event as MpEvent
from typing import Any
2020-11-04 15:31:25 +03:00
2026-03-25 17:44:12 +03:00
from frigate.camera import CameraMetrics
from frigate.comms.inter_process import InterProcessRequestor
from frigate.comms.recordings_updater import (
RecordingsDataSubscriber,
RecordingsDataTypeEnum,
)
2026-03-25 17:44:12 +03:00
from frigate.config import CameraConfig, LoggerConfig
from frigate.config.camera.updater import (
CameraConfigUpdateEnum,
CameraConfigUpdateSubscriber,
)
2026-03-25 17:44:12 +03:00
from frigate.const import PROCESS_PRIORITY_HIGH
2020-12-04 15:59:03 +03:00
from frigate.log import LogPipe
from frigate.util.builtin import EventsPerSecond, get_ffmpeg_arg_list
2026-03-25 17:44:12 +03:00
from frigate.util.ffmpeg import start_or_restart_ffmpeg, stop_ffmpeg
from frigate.util.image import (
2021-02-17 16:23:32 +03:00
FrameManager,
SharedMemoryFrameManager,
)
from frigate.util.process import FrigateProcess
2019-02-26 05:27:02 +03:00
2020-11-04 06:26:39 +03:00
logger = logging.getLogger(__name__)
# all built-in record presets use this segment_time
DEFAULT_RECORD_SEGMENT_TIME = 10
def _get_record_segment_time(config: CameraConfig) -> int:
"""Extract -segment_time from the camera's record output args."""
record_args = get_ffmpeg_arg_list(config.ffmpeg.output_args.record)
if record_args and record_args[0].startswith("preset"):
return DEFAULT_RECORD_SEGMENT_TIME
try:
idx = record_args.index("-segment_time")
return int(record_args[idx + 1])
except (ValueError, IndexError):
return DEFAULT_RECORD_SEGMENT_TIME
2021-02-17 16:23:32 +03:00
def capture_frames(
ffmpeg_process: sp.Popen[Any],
config: CameraConfig,
shm_frame_count: int,
frame_index: int,
frame_shape: tuple[int, int],
2021-02-17 16:23:32 +03:00
frame_manager: FrameManager,
frame_queue,
fps: Value,
skipped_fps: Value,
current_frame: Value,
stop_event: MpEvent,
) -> None:
2020-11-03 17:15:58 +03:00
frame_size = frame_shape[0] * frame_shape[1]
2020-10-25 18:05:21 +03:00
frame_rate = EventsPerSecond()
2020-10-26 15:59:22 +03:00
frame_rate.start()
2020-10-25 18:05:21 +03:00
skipped_eps = EventsPerSecond()
skipped_eps.start()
config_subscriber = CameraConfigUpdateSubscriber(
None, {config.name: config}, [CameraConfigUpdateEnum.enabled]
)
def get_enabled_state():
"""Fetch the latest enabled state from ZMQ."""
config_subscriber.check_for_updates()
return config.enabled
try:
while not stop_event.is_set():
if not get_enabled_state():
logger.debug(f"Stopping capture thread for disabled {config.name}")
break
fps.value = frame_rate.eps()
skipped_fps.value = skipped_eps.eps()
current_frame.value = datetime.now().timestamp()
frame_name = f"{config.name}_frame{frame_index}"
frame_buffer = frame_manager.write(frame_name)
try:
frame_buffer[:] = ffmpeg_process.stdout.read(frame_size)
except Exception:
# shutdown has been initiated
if stop_event.is_set():
break
2020-12-12 18:12:15 +03:00
2022-02-06 17:46:41 +03:00
logger.error(
f"{config.name}: Unable to read frames from ffmpeg process."
2021-02-17 16:23:32 +03:00
)
if ffmpeg_process.poll() is not None:
logger.error(
f"{config.name}: ffmpeg process is not running. exiting capture thread..."
)
break
continue
frame_rate.update()
# don't lock the queue to check, just try since it should rarely be full
try:
# add to the queue
frame_queue.put((frame_name, current_frame.value), False)
frame_manager.close(frame_name)
except queue.Full:
# if the queue is full, skip this frame
skipped_eps.update()
frame_index = 0 if frame_index == shm_frame_count - 1 else frame_index + 1
finally:
config_subscriber.stop()
2021-02-17 16:23:32 +03:00
2020-10-25 18:05:21 +03:00
class CameraWatchdog(threading.Thread):
2021-02-17 16:23:32 +03:00
def __init__(
self,
config: CameraConfig,
shm_frame_count: int,
frame_queue: Queue,
camera_fps,
skipped_fps,
ffmpeg_pid,
stalls,
reconnects,
detection_frame,
stop_event,
2021-02-17 16:23:32 +03:00
):
2020-10-25 18:05:21 +03:00
threading.Thread.__init__(self)
self.logger = logging.getLogger(f"watchdog.{config.name}")
2020-10-25 18:05:21 +03:00
self.config = config
self.shm_frame_count = shm_frame_count
2020-10-25 18:05:21 +03:00
self.capture_thread = None
2020-11-30 00:55:53 +03:00
self.ffmpeg_detect_process = None
self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.detect")
self.ffmpeg_other_processes: list[dict[str, Any]] = []
2020-10-25 18:05:21 +03:00
self.camera_fps = camera_fps
self.skipped_fps = skipped_fps
self.ffmpeg_pid = ffmpeg_pid
2020-10-25 18:05:21 +03:00
self.frame_queue = frame_queue
2020-11-03 17:15:58 +03:00
self.frame_shape = self.config.frame_shape_yuv
self.frame_size = self.frame_shape[0] * self.frame_shape[1]
self.fps_overflow_count = 0
self.frame_index = 0
2020-11-30 01:19:59 +03:00
self.stop_event = stop_event
self.sleeptime = self.config.ffmpeg.retry_interval
self.reconnect_timestamps = deque()
self.stalls = stalls
self.reconnects = reconnects
self.detection_frame = detection_frame
2020-10-25 18:05:21 +03:00
self.config_subscriber = CameraConfigUpdateSubscriber(
None,
{config.name: config},
[
CameraConfigUpdateEnum.enabled,
CameraConfigUpdateEnum.ffmpeg,
CameraConfigUpdateEnum.record,
],
)
self.requestor = InterProcessRequestor()
self.was_enabled = self.config.enabled
UI fixes (#23127) * hide camera overrides badge from system sections * show empty card on camera metrics page when no cameras are defined * fix enabled camera state switch after adding via wizard Cameras added mid-session have no WS state until the dispatcher publishes camera_activity (which only happens on a fresh onConnect). Fall back to the config's enabled value so the switch reflects reality immediately after the wizard closes. * guard camera enabled access console would throw errors after adding via camera wizard * fix useOptimisticState dropping debounced setState under StrictMode * use openvino on cpu as default model - faster than tflite on cpu - add to default generated config * use an enum for model_size the frontend will then render this as a select dropdown because of the changes in the json schema * i18n * sync object filter entries with tracked labels in camera config form Filter sub-collapsibles in the camera Objects section are driven by `filters` dict keys, but profile merges and live track-switch edits don't add matching entries, so newly tracked labels (like from a profile override) had no collapsible. Synthesize default filter entries from `track` in the form data so every tracked label renders a collapsible; baseline data also gets the synthesized entries, so save payloads are unchanged. * revalidate raw paths cache after config save so CameraPathWidget shows fresh credentials * fix test * restore masked ffmpeg credentials when persisting camera config * formatting * rebuild ffmpeg commands when enabling recording for the first time Toggling record.enabled from the config UI updated the in-memory config but left ffmpeg running with its original command, so the record output args were never wired in and nothing landed in the cache for the maintainer to move. The record config update now rebuilds ffmpeg_cmds when enabled_in_config transitions, and the camera watchdog restarts ffmpeg on a false to true transition so the record output gets wired in. MQTT toggles, which only flip record.enabled at runtime, are unaffected and continue to work via the maintainer's drop/keep gate. * keep record toggle switch in single camera view disabled until enabled in config * fix override detection for sections unset in the global config Override badges and the blue dot now compare against schema defaults for sections like motion that the API serializes as null when omitted from the global YAML, instead of treating any populated camera config as an override * add support for config-aware patterns in section hiddenFields Section configs can now declare dynamic hidden-field entries as functions of the loaded config; objects.ts uses this to hide auto-populated attribute filters (DHL, face, license_plate, etc.) from the form, save flow, and override popover when those labels aren't user-settable * siimplify object filters handling live updating was getting very messy. users will just need to save once they enable a new object in order to see filters for that object * tweaks * update docs for new detector default * make genai provider required and add special case for UI prevent validation errors from appearing on initial creation of genai provider by setting the first option in the select dropdown as default
2026-05-07 16:53:07 +03:00
self.was_record_enabled_in_config = self.config.record.enabled_in_config
2020-11-30 00:55:53 +03:00
self.segment_subscriber = RecordingsDataSubscriber(RecordingsDataTypeEnum.all)
self.latest_valid_segment_time: float = 0
self.latest_invalid_segment_time: float = 0
self.latest_cache_segment_time: float = 0
self.record_enable_time: datetime | None = None
# `valid` segments are published with the segment's start time, so the
# gap between consecutive publishes can reach 2 * segment_time. Pad the
# staleness threshold so it's never tighter than that worst case.
segment_time = _get_record_segment_time(self.config)
self.record_stale_threshold = max(120, 2 * segment_time + 30)
# Stall tracking (based on last processed frame)
self._stall_timestamps: deque[float] = deque()
self._stall_active: bool = False
# Status caching to reduce message volume
self._last_detect_status: str | None = None
self._last_record_status: str | None = None
self._last_status_update_time: float = 0.0
def _send_detect_status(self, status: str, now: float) -> None:
"""Send detect status only if changed or retry_interval has elapsed."""
if (
status != self._last_detect_status
or (now - self._last_status_update_time) >= self.sleeptime
):
self.requestor.send_data(f"{self.config.name}/status/detect", status)
self._last_detect_status = status
self._last_status_update_time = now
def _send_record_status(self, status: str, now: float) -> None:
"""Send record status only if changed or retry_interval has elapsed."""
if (
status != self._last_record_status
or (now - self._last_status_update_time) >= self.sleeptime
):
self.requestor.send_data(f"{self.config.name}/status/record", status)
self._last_record_status = status
self._last_status_update_time = now
def _check_config_updates(self) -> dict[str, list[str]]:
"""Check for config updates and return the update dict."""
return self.config_subscriber.check_for_updates()
def _update_enabled_state(self) -> bool:
"""Fetch the latest config and update enabled state."""
self._check_config_updates()
return self.config.enabled
def reset_capture_thread(
self, terminate: bool = True, drain_output: bool = True
) -> None:
if terminate:
self.ffmpeg_detect_process.terminate()
try:
self.logger.info("Waiting for ffmpeg to exit gracefully...")
if drain_output:
self.ffmpeg_detect_process.communicate(timeout=30)
else:
self.ffmpeg_detect_process.wait(timeout=30)
except sp.TimeoutExpired:
self.logger.info("FFmpeg did not exit. Force killing...")
self.ffmpeg_detect_process.kill()
if drain_output:
self.ffmpeg_detect_process.communicate()
else:
self.ffmpeg_detect_process.wait()
# Update reconnects
now = datetime.now().timestamp()
self.reconnect_timestamps.append(now)
while self.reconnect_timestamps and self.reconnect_timestamps[0] < now - 3600:
self.reconnect_timestamps.popleft()
if self.reconnects:
self.reconnects.value = len(self.reconnect_timestamps)
# Wait for old capture thread to fully exit before starting a new one
if self.capture_thread is not None and self.capture_thread.is_alive():
self.logger.info("Waiting for capture thread to exit...")
self.capture_thread.join(timeout=5)
if self.capture_thread.is_alive():
self.logger.warning(
f"Capture thread for {self.config.name} did not exit in time"
)
self.logger.error(
"The following ffmpeg logs include the last 100 lines prior to exit."
)
self.logpipe.dump()
self.logger.info("Restarting ffmpeg...")
self.start_ffmpeg_detect()
def run(self) -> None:
if self._update_enabled_state():
self.start_all_ffmpeg()
# If recording is enabled at startup, set the grace period timer
if self.config.record.enabled:
self.record_enable_time = datetime.now().astimezone(timezone.utc)
2021-02-17 16:23:32 +03:00
time.sleep(self.sleeptime)
last_restart_time = datetime.now().timestamp()
# 1 second watchdog loop
while not self.stop_event.wait(1):
updates = self._check_config_updates()
# Handle ffmpeg config changes by restarting all ffmpeg processes
if "ffmpeg" in updates and self.config.enabled:
self.logger.debug(
"FFmpeg config updated for %s, restarting ffmpeg processes",
self.config.name,
)
self.stop_all_ffmpeg()
self.start_all_ffmpeg()
self.latest_valid_segment_time = 0
self.latest_invalid_segment_time = 0
self.latest_cache_segment_time = 0
self.record_enable_time = datetime.now().astimezone(timezone.utc)
last_restart_time = datetime.now().timestamp()
continue
enabled = self.config.enabled
if enabled != self.was_enabled:
if enabled:
self.logger.debug(f"Enabling camera {self.config.name}")
self.start_all_ffmpeg()
# reset all timestamps and record the enable time for grace period
self.latest_valid_segment_time = 0
self.latest_invalid_segment_time = 0
self.latest_cache_segment_time = 0
self.record_enable_time = datetime.now().astimezone(timezone.utc)
else:
self.logger.debug(f"Disabling camera {self.config.name}")
self.stop_all_ffmpeg()
self.record_enable_time = None
# update camera status
now = datetime.now().timestamp()
self._send_detect_status("disabled", now)
self._send_record_status("disabled", now)
self.was_enabled = enabled
continue
UI fixes (#23127) * hide camera overrides badge from system sections * show empty card on camera metrics page when no cameras are defined * fix enabled camera state switch after adding via wizard Cameras added mid-session have no WS state until the dispatcher publishes camera_activity (which only happens on a fresh onConnect). Fall back to the config's enabled value so the switch reflects reality immediately after the wizard closes. * guard camera enabled access console would throw errors after adding via camera wizard * fix useOptimisticState dropping debounced setState under StrictMode * use openvino on cpu as default model - faster than tflite on cpu - add to default generated config * use an enum for model_size the frontend will then render this as a select dropdown because of the changes in the json schema * i18n * sync object filter entries with tracked labels in camera config form Filter sub-collapsibles in the camera Objects section are driven by `filters` dict keys, but profile merges and live track-switch edits don't add matching entries, so newly tracked labels (like from a profile override) had no collapsible. Synthesize default filter entries from `track` in the form data so every tracked label renders a collapsible; baseline data also gets the synthesized entries, so save payloads are unchanged. * revalidate raw paths cache after config save so CameraPathWidget shows fresh credentials * fix test * restore masked ffmpeg credentials when persisting camera config * formatting * rebuild ffmpeg commands when enabling recording for the first time Toggling record.enabled from the config UI updated the in-memory config but left ffmpeg running with its original command, so the record output args were never wired in and nothing landed in the cache for the maintainer to move. The record config update now rebuilds ffmpeg_cmds when enabled_in_config transitions, and the camera watchdog restarts ffmpeg on a false to true transition so the record output gets wired in. MQTT toggles, which only flip record.enabled at runtime, are unaffected and continue to work via the maintainer's drop/keep gate. * keep record toggle switch in single camera view disabled until enabled in config * fix override detection for sections unset in the global config Override badges and the blue dot now compare against schema defaults for sections like motion that the API serializes as null when omitted from the global YAML, instead of treating any populated camera config as an override * add support for config-aware patterns in section hiddenFields Section configs can now declare dynamic hidden-field entries as functions of the loaded config; objects.ts uses this to hide auto-populated attribute filters (DHL, face, license_plate, etc.) from the form, save flow, and override popover when those labels aren't user-settable * siimplify object filters handling live updating was getting very messy. users will just need to save once they enable a new object in order to see filters for that object * tweaks * update docs for new detector default * make genai provider required and add special case for UI prevent validation errors from appearing on initial creation of genai provider by setting the first option in the select dropdown as default
2026-05-07 16:53:07 +03:00
record_enabled_in_config = self.config.record.enabled_in_config
if record_enabled_in_config != self.was_record_enabled_in_config:
if record_enabled_in_config and enabled:
self.logger.debug(
f"Record enabled in config for {self.config.name}, restarting ffmpeg"
)
self.stop_all_ffmpeg()
self.start_all_ffmpeg()
self.latest_valid_segment_time = 0
self.latest_invalid_segment_time = 0
self.latest_cache_segment_time = 0
self.record_enable_time = datetime.now().astimezone(timezone.utc)
last_restart_time = datetime.now().timestamp()
self.was_record_enabled_in_config = record_enabled_in_config
continue
if not enabled:
continue
while True:
update = self.segment_subscriber.check_for_update(timeout=0)
if update == (None, None):
break
raw_topic, payload = update
if raw_topic and payload:
topic = str(raw_topic)
camera, segment_time, _ = payload
if camera != self.config.name:
continue
if topic.endswith(RecordingsDataTypeEnum.invalid.value):
self.logger.warning(
f"Invalid recording segment detected for {camera} at {segment_time}"
)
self.latest_invalid_segment_time = segment_time
elif topic.endswith(RecordingsDataTypeEnum.valid.value):
self.logger.debug(
f"Latest valid recording segment time on {camera}: {segment_time}"
)
self.latest_valid_segment_time = segment_time
elif topic.endswith(RecordingsDataTypeEnum.latest.value):
if segment_time is not None:
self.latest_cache_segment_time = segment_time
else:
self.latest_cache_segment_time = 0
now = datetime.now().timestamp()
2020-10-25 18:05:21 +03:00
# Check if enough time has passed to allow ffmpeg restart (backoff pacing)
time_since_last_restart = now - last_restart_time
can_restart = time_since_last_restart >= self.sleeptime
2020-10-25 18:05:21 +03:00
if not self.capture_thread.is_alive():
self._send_detect_status("offline", now)
self.camera_fps.value = 0
2021-08-14 22:04:00 +03:00
self.logger.error(
f"Ffmpeg process crashed unexpectedly for {self.config.name}."
2021-08-14 22:04:00 +03:00
)
if can_restart:
self.reset_capture_thread(terminate=False)
last_restart_time = now
elif self.camera_fps.value >= (self.config.detect.fps + 10):
self.fps_overflow_count += 1
if self.fps_overflow_count == 3:
self._send_detect_status("offline", now)
self.fps_overflow_count = 0
self.camera_fps.value = 0
self.logger.info(
f"{self.config.name} exceeded fps limit. Exiting ffmpeg..."
)
if can_restart:
self.reset_capture_thread(drain_output=False)
last_restart_time = now
elif now - self.capture_thread.current_frame.value > 20:
self._send_detect_status("offline", now)
self.camera_fps.value = 0
self.logger.info(
f"No frames received from {self.config.name} in 20 seconds. Exiting ffmpeg..."
)
if can_restart:
self.reset_capture_thread()
last_restart_time = now
else:
# process is running normally
self._send_detect_status("online", now)
self.fps_overflow_count = 0
2021-02-17 16:23:32 +03:00
2020-11-30 00:55:53 +03:00
for p in self.ffmpeg_other_processes:
2021-02-17 16:23:32 +03:00
poll = p["process"].poll()
if self.config.record.enabled and "record" in p["roles"]:
now_utc = datetime.now().astimezone(timezone.utc)
# Check if we're within the grace period after enabling recording
# Grace period: 90 seconds allows time for ffmpeg to start and create first segment
in_grace_period = self.record_enable_time is not None and (
now_utc - self.record_enable_time
) < timedelta(seconds=90)
latest_cache_dt = (
datetime.fromtimestamp(
self.latest_cache_segment_time, tz=timezone.utc
)
if self.latest_cache_segment_time > 0
else now_utc - timedelta(seconds=1)
)
latest_valid_dt = (
datetime.fromtimestamp(
self.latest_valid_segment_time, tz=timezone.utc
)
if self.latest_valid_segment_time > 0
else now_utc - timedelta(seconds=1)
)
latest_invalid_dt = (
datetime.fromtimestamp(
self.latest_invalid_segment_time, tz=timezone.utc
)
if self.latest_invalid_segment_time > 0
else now_utc - timedelta(seconds=1)
)
# ensure segments are still being created and that they have valid video data
# Skip checks during grace period to allow segments to start being created
stale_window = timedelta(seconds=self.record_stale_threshold)
cache_stale = not in_grace_period and now_utc > (
latest_cache_dt + stale_window
)
valid_stale = not in_grace_period and now_utc > (
latest_valid_dt + stale_window
)
invalid_stale_condition = (
self.latest_invalid_segment_time > 0
and not in_grace_period
and now_utc > (latest_invalid_dt + stale_window)
and self.latest_valid_segment_time
<= self.latest_invalid_segment_time
)
invalid_stale = invalid_stale_condition
if cache_stale or valid_stale or invalid_stale:
if cache_stale:
reason = "No new recording segments were created"
elif valid_stale:
reason = "No new valid recording segments were created"
else: # invalid_stale
reason = (
"No valid segments created since last invalid segment"
)
self.logger.error(
f"{reason} for {self.config.name} in the last {self.record_stale_threshold}s. Restarting the ffmpeg record process..."
)
p["process"] = start_or_restart_ffmpeg(
p["cmd"],
self.logger,
p["logpipe"],
ffmpeg_process=p["process"],
)
for role in p["roles"]:
self.requestor.send_data(
f"{self.config.name}/status/{role.value}", "offline"
)
continue
else:
self._send_record_status("online", now)
p["latest_segment_time"] = self.latest_cache_segment_time
if poll is None:
2020-11-30 00:55:53 +03:00
continue
for role in p["roles"]:
self.requestor.send_data(
f"{self.config.name}/status/{role.value}", "offline"
)
2021-02-17 16:23:32 +03:00
p["logpipe"].dump()
p["process"] = start_or_restart_ffmpeg(
p["cmd"], self.logger, p["logpipe"], ffmpeg_process=p["process"]
)
# Prune expired reconnect timestamps
now = datetime.now().timestamp()
while (
self.reconnect_timestamps and self.reconnect_timestamps[0] < now - 3600
):
self.reconnect_timestamps.popleft()
if self.reconnects:
self.reconnects.value = len(self.reconnect_timestamps)
# Update stall metrics based on last processed frame timestamp
processed_ts = (
float(self.detection_frame.value) if self.detection_frame else 0.0
)
if processed_ts > 0:
delta = now - processed_ts
observed_fps = (
self.camera_fps.value
if self.camera_fps.value > 0
else self.config.detect.fps
)
interval = 1.0 / max(observed_fps, 0.1)
stall_threshold = max(2.0 * interval, 2.0)
if delta > stall_threshold:
if not self._stall_active:
self._stall_timestamps.append(now)
self._stall_active = True
else:
self._stall_active = False
while self._stall_timestamps and self._stall_timestamps[0] < now - 3600:
self._stall_timestamps.popleft()
if self.stalls:
self.stalls.value = len(self._stall_timestamps)
self.stop_all_ffmpeg()
self.logpipe.close()
self.config_subscriber.stop()
self.segment_subscriber.stop()
2021-02-17 16:23:32 +03:00
2020-11-30 00:55:53 +03:00
def start_ffmpeg_detect(self):
2021-02-17 16:23:32 +03:00
ffmpeg_cmd = [
c["cmd"] for c in self.config.ffmpeg_cmds if "detect" in c["roles"]
][0]
self.ffmpeg_detect_process = start_or_restart_ffmpeg(
ffmpeg_cmd, self.logger, self.logpipe, self.frame_size
)
2020-11-30 00:55:53 +03:00
self.ffmpeg_pid.value = self.ffmpeg_detect_process.pid
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
self.capture_thread = CameraCaptureRunner(
self.config,
self.shm_frame_count,
self.frame_index,
2021-02-17 16:23:32 +03:00
self.ffmpeg_detect_process,
self.frame_shape,
self.frame_queue,
self.camera_fps,
self.skipped_fps,
self.stop_event,
2021-02-17 16:23:32 +03:00
)
2020-11-01 19:55:11 +03:00
self.capture_thread.start()
2020-10-25 18:05:21 +03:00
def start_all_ffmpeg(self):
"""Start all ffmpeg processes (detection and others)."""
logger.debug(f"Starting all ffmpeg processes for {self.config.name}")
self.start_ffmpeg_detect()
for c in self.config.ffmpeg_cmds:
if "detect" in c["roles"]:
continue
logpipe = LogPipe(
f"ffmpeg.{self.config.name}.{'_'.join(sorted(c['roles']))}"
)
self.ffmpeg_other_processes.append(
{
"cmd": c["cmd"],
"roles": c["roles"],
"logpipe": logpipe,
"process": start_or_restart_ffmpeg(c["cmd"], self.logger, logpipe),
}
)
def stop_all_ffmpeg(self):
"""Stop all ffmpeg processes (detection and others)."""
logger.debug(f"Stopping all ffmpeg processes for {self.config.name}")
if self.capture_thread is not None and self.capture_thread.is_alive():
self.capture_thread.join(timeout=5)
if self.capture_thread.is_alive():
self.logger.warning(
f"Capture thread for {self.config.name} did not stop gracefully."
)
if self.ffmpeg_detect_process is not None:
stop_ffmpeg(self.ffmpeg_detect_process, self.logger)
self.ffmpeg_detect_process = None
for p in self.ffmpeg_other_processes[:]:
if p["process"] is not None:
stop_ffmpeg(p["process"], self.logger)
p["logpipe"].close()
self.ffmpeg_other_processes.clear()
2021-02-17 16:23:32 +03:00
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
class CameraCaptureRunner(threading.Thread):
def __init__(
self,
config: CameraConfig,
shm_frame_count: int,
frame_index: int,
ffmpeg_process,
frame_shape: tuple[int, int],
frame_queue: Queue,
fps: Value,
skipped_fps: Value,
stop_event: MpEvent,
):
threading.Thread.__init__(self)
self.name = f"capture:{config.name}"
self.config = config
self.shm_frame_count = shm_frame_count
self.frame_index = frame_index
self.frame_shape = frame_shape
self.frame_queue = frame_queue
self.fps = fps
self.stop_event = stop_event
self.skipped_fps = skipped_fps
self.frame_manager = SharedMemoryFrameManager()
self.ffmpeg_process = ffmpeg_process
self.current_frame = Value("d", 0.0)
self.last_frame = 0
def run(self):
2021-02-17 16:23:32 +03:00
capture_frames(
self.ffmpeg_process,
self.config,
self.shm_frame_count,
self.frame_index,
2021-02-17 16:23:32 +03:00
self.frame_shape,
self.frame_manager,
self.frame_queue,
self.fps,
self.skipped_fps,
self.current_frame,
self.stop_event,
2021-02-17 16:23:32 +03:00
)
class CameraCapture(FrigateProcess):
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
def __init__(
self,
config: CameraConfig,
shm_frame_count: int,
camera_metrics: CameraMetrics,
stop_event: MpEvent,
log_config: LoggerConfig | None = None,
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
) -> None:
super().__init__(
stop_event,
PROCESS_PRIORITY_HIGH,
name=f"frigate.capture:{config.name}",
daemon=True,
)
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
self.config = config
self.shm_frame_count = shm_frame_count
self.camera_metrics = camera_metrics
self.log_config = log_config
2020-11-30 01:19:59 +03:00
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
def run(self) -> None:
self.pre_run_setup(self.log_config)
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
camera_watchdog = CameraWatchdog(
self.config,
self.shm_frame_count,
self.camera_metrics.frame_queue,
self.camera_metrics.camera_fps,
self.camera_metrics.skipped_fps,
self.camera_metrics.ffmpeg_pid,
self.camera_metrics.stalls_last_hour,
self.camera_metrics.reconnects_last_hour,
self.camera_metrics.detection_frame,
Use Fork-Server As Spawn Method (#18682) * Set runtime * Use count correctly * Don't assume camera sizes * Use separate zmq proxy for object detection * Correct order * Use forkserver * Only store PID instead of entire process reference * Cleanup * Catch correct errors * Fix typing * Remove before_run from process util The before_run never actually ran because: You're right to suspect an issue with before_run not being called and a potential deadlock. The way you've implemented the run_wrapper using __getattribute__ for the run method of BaseProcess is a common pitfall in Python's multiprocessing, especially when combined with how multiprocessing.Process works internally. Here's a breakdown of why before_run isn't being called and why you might be experiencing a deadlock: The Problem: __getattribute__ and Process Serialization When you create a multiprocessing.Process object and call start(), the multiprocessing module needs to serialize the process object (or at least enough of it to re-create the process in the new interpreter). It then pickles this serialized object and sends it to the newly spawned process. The issue with your __getattribute__ implementation for run is that: run is retrieved during serialization: When multiprocessing tries to pickle your Process object to send to the new process, it will likely access the run attribute. This triggers your __getattribute__ wrapper, which then tries to bind run_wrapper to self. run_wrapper is bound to the parent process's self: The run_wrapper closure, when created in the parent process, captures the self (the Process instance) from the parent's memory space. Deserialization creates a new object: In the child process, a new Process object is created by deserializing the pickled data. However, the run_wrapper method that was pickled still holds a reference to the self from the parent process. This is a subtle but critical distinction. The child's run is not your wrapped run: When the child process starts, it internally calls its own run method. Because of the serialization and deserialization process, the run method that's ultimately executed in the child process is the original multiprocessing.Process.run or the Process.run if you had directly overridden it. Your __getattribute__ magic, which wraps run, isn't correctly applied to the Process object within the child's context. * Cleanup * Logging bugfix (#18465) * use mp Manager to handle logging queues A Python bug (https://github.com/python/cpython/issues/91555) was preventing logs from the embeddings maintainer process from printing. The bug is fixed in Python 3.14, but a viable workaround is to use the multiprocessing Manager, which better manages mp queues and causes the logging to work correctly. * consolidate * fix typing * Fix typing * Use global log queue * Move to using process for logging * Convert camera tracking to process * Add more processes * Finalize process * Cleanup * Cleanup typing * Formatting * Remove daemon --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2025-06-12 21:12:34 +03:00
self.stop_event,
)
camera_watchdog.start()
camera_watchdog.join()