diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index b28de5e6b..2d216d386 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -81,3 +81,5 @@ librosa==0.11.* soundfile==0.13.* # DeGirum detector degirum == 0.16.* +# Memory profiling +memray == 1.15.* diff --git a/docs/docs/troubleshooting/memory.md b/docs/docs/troubleshooting/memory.md new file mode 100644 index 000000000..b8ef5367d --- /dev/null +++ b/docs/docs/troubleshooting/memory.md @@ -0,0 +1,129 @@ +--- +id: memory +title: Memory Troubleshooting +--- + +Frigate includes built-in memory profiling using [memray](https://bloomberg.github.io/memray/) to help diagnose memory issues. This feature allows you to profile specific Frigate modules to identify memory leaks, excessive allocations, or other memory-related problems. + +## Enabling Memory Profiling + +Memory profiling is controlled via the `FRIGATE_MEMRAY_MODULES` environment variable. Set it to a comma-separated list of module names you want to profile: + +```bash +export FRIGATE_MEMRAY_MODULES="frigate.review_segment_manager,frigate.capture" +``` + +### Module Names + +Frigate processes are named using a module-based naming scheme. Common module names include: + +- `frigate.review_segment_manager` - Review segment processing +- `frigate.recording_manager` - Recording management +- `frigate.capture` - Camera capture processes (all cameras with this module name) +- `frigate.process` - Camera processing/tracking (all cameras with this module name) +- `frigate.output` - Output processing +- `frigate.audio_manager` - Audio processing +- `frigate.embeddings` - Embeddings processing + +You can also specify the full process name (including camera-specific identifiers) if you want to profile a specific camera: + +```bash +export FRIGATE_MEMRAY_MODULES="frigate.capture:front_door" +``` + +When you specify a module name (e.g., `frigate.capture`), all processes with that module prefix will be profiled. For example, `frigate.capture` will profile all camera capture processes. + +## How It Works + +1. **Binary File Creation**: When profiling is enabled, memray creates a binary file (`.bin`) in `/config/memray_reports/` that is updated continuously in real-time as the process runs. + +2. **Automatic HTML Generation**: On normal process exit, Frigate automatically: + + - Stops memray tracking + - Generates an HTML flamegraph report + - Saves it to `/config/memray_reports/.html` + +3. **Crash Recovery**: If a process crashes (SIGKILL, segfault, etc.), the binary file is preserved with all data up to the crash point. You can manually generate the HTML report from the binary file. + +## Viewing Reports + +### Automatic Reports + +After a process exits normally, you'll find HTML reports in `/config/memray_reports/`. Open these files in a web browser to view interactive flamegraphs showing memory usage patterns. + +### Manual Report Generation + +If a process crashes or you want to generate a report from an existing binary file, you can manually create the HTML report: + +```bash +memray flamegraph /config/memray_reports/.bin +``` + +This will generate an HTML file that you can open in your browser. + +## Understanding the Reports + +Memray flamegraphs show: + +- **Memory allocations over time**: See where memory is being allocated in your code +- **Call stacks**: Understand the full call chain leading to allocations +- **Memory hotspots**: Identify functions or code paths that allocate the most memory +- **Memory leaks**: Spot patterns where memory is allocated but not freed + +The interactive HTML reports allow you to: + +- Zoom into specific time ranges +- Filter by function names +- View detailed allocation information +- Export data for further analysis + +## Best Practices + +1. **Profile During Issues**: Enable profiling when you're experiencing memory issues, not all the time, as it adds some overhead. + +2. **Profile Specific Modules**: Instead of profiling everything, focus on the modules you suspect are causing issues. + +3. **Let Processes Run**: Allow processes to run for a meaningful duration to capture representative memory usage patterns. + +4. **Check Binary Files**: If HTML reports aren't generated automatically (e.g., after a crash), check for `.bin` files in `/config/memray_reports/` and generate reports manually. + +5. **Compare Reports**: Generate reports at different times to compare memory usage patterns and identify trends. + +## Troubleshooting + +### No Reports Generated + +- Check that the environment variable is set correctly +- Verify the module name matches exactly (case-sensitive) +- Check logs for memray-related errors +- Ensure `/config/memray_reports/` directory exists and is writable + +### Process Crashed Before Report Generation + +- Look for `.bin` files in `/config/memray_reports/` +- Manually generate HTML reports using: `memray flamegraph .bin` +- The binary file contains all data up to the crash point + +### Reports Show No Data + +- Ensure the process ran long enough to generate meaningful data +- Check that memray is properly installed (included by default in Frigate) +- Verify the process actually started and ran (check process logs) + +## Example Usage + +```bash +# Enable profiling for review and capture modules +export FRIGATE_MEMRAY_MODULES="frigate.review_segment_manager,frigate.capture" + +# Start Frigate +# ... let it run for a while ... + +# Check for reports +ls -lh /config/memray_reports/ + +# If a process crashed, manually generate report +memray flamegraph /config/memray_reports/frigate_capture_front_door.bin +``` + +For more information about memray and interpreting reports, see the [official memray documentation](https://bloomberg.github.io/memray/). diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 09b639aa1..1f5d0572f 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -131,6 +131,7 @@ const sidebars: SidebarsConfig = { "troubleshooting/recordings", "troubleshooting/gpu", "troubleshooting/edgetpu", + "troubleshooting/memory", ], Development: [ "development/contributing", diff --git a/frigate/util/process.py b/frigate/util/process.py index b9fede44e..aa9fde2a8 100644 --- a/frigate/util/process.py +++ b/frigate/util/process.py @@ -1,7 +1,10 @@ +import atexit import faulthandler import logging import multiprocessing as mp import os +import pathlib +import subprocess import threading from logging.handlers import QueueHandler from multiprocessing.synchronize import Event as MpEvent @@ -48,6 +51,7 @@ class FrigateProcess(BaseProcess): def before_start(self) -> None: self.__log_queue = frigate.log.log_listener.queue + self.__memray_tracker = None def pre_run_setup(self, logConfig: LoggerConfig | None = None) -> None: os.nice(self.priority) @@ -64,3 +68,86 @@ class FrigateProcess(BaseProcess): frigate.log.apply_log_levels( logConfig.default.value.upper(), logConfig.logs ) + + self._setup_memray() + + def _setup_memray(self) -> None: + """Setup memray profiling if enabled via environment variable.""" + memray_modules = os.environ.get("FRIGATE_MEMRAY_MODULES", "") + + if not memray_modules: + return + + # Extract module name from process name (e.g., "frigate.capture:camera" -> "frigate.capture") + process_name = self.name + module_name = ( + process_name.split(":")[0] if ":" in process_name else process_name + ) + + enabled_modules = [m.strip() for m in memray_modules.split(",")] + + if module_name not in enabled_modules and process_name not in enabled_modules: + return + + try: + import memray + + reports_dir = pathlib.Path("/config/memray_reports") + reports_dir.mkdir(parents=True, exist_ok=True) + safe_name = ( + process_name.replace(":", "_").replace("/", "_").replace("\\", "_") + ) + + binary_file = reports_dir / f"{safe_name}.bin" + + self.__memray_tracker = memray.Tracker(str(binary_file)) + self.__memray_tracker.__enter__() + + # Register cleanup handler to stop tracking and generate HTML report + # atexit runs on normal exits and most signal-based terminations (SIGTERM, SIGINT) + # For hard kills (SIGKILL) or segfaults, the binary file is preserved for manual generation + atexit.register(self._cleanup_memray, safe_name, binary_file) + + self.logger.info( + f"Memray profiling enabled for module {module_name} (process: {self.name}). " + f"Binary file (updated continuously): {binary_file}. " + f"HTML report will be generated on exit: {reports_dir}/{safe_name}.html. " + f"If process crashes, manually generate with: memray flamegraph {binary_file}" + ) + except Exception as e: + self.logger.error(f"Failed to setup memray profiling: {e}", exc_info=True) + + def _cleanup_memray(self, safe_name: str, binary_file: pathlib.Path) -> None: + """Stop memray tracking and generate HTML report.""" + if self.__memray_tracker is None: + return + + try: + self.__memray_tracker.__exit__(None, None, None) + self.__memray_tracker = None + + reports_dir = pathlib.Path("/config/memray_reports") + html_file = reports_dir / f"{safe_name}.html" + + result = subprocess.run( + ["memray", "flamegraph", "--output", str(html_file), str(binary_file)], + capture_output=True, + text=True, + timeout=10, + ) + + if result.returncode == 0: + self.logger.info(f"Memray report generated: {html_file}") + else: + self.logger.error( + f"Failed to generate memray report: {result.stderr}. " + f"Binary file preserved at {binary_file} for manual generation." + ) + + # Keep the binary file for manual report generation if needed + # Users can run: memray flamegraph {binary_file} + + except subprocess.TimeoutExpired: + self.logger.error("Memray report generation timed out") + except Exception as e: + self.logger.error(f"Failed to cleanup memray profiling: {e}", exc_info=True)