Add ability to change source of images for review descriptions (#20676)

* Add ability to change source of images for review descriptions * Undo
2025-12-06 05:24:11 +03:00 · 2025-10-26 07:40:38 -06:00 · 2025-10-26 07:40:38 -06:00 · 094a0a6e05
commit 094a0a6e05
parent 840d567d22
4 changed files with 223 additions and 39 deletions
--- a/docs/docs/configuration/genai/review_summaries.md
+++ b/docs/docs/configuration/genai/review_summaries.md
@ -39,6 +39,26 @@ Each installation and even camera can have different parameters for what is cons
 - Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine.
 ```
 ### Image Source
 By default, review summaries use preview images (cached preview frames) which have a lower resolution but use fewer tokens per image. For better image quality and more detailed analysis, you can configure Frigate to extract frames directly from recordings at a higher resolution:
 ```yaml
 review:
  genai:
    enabled: true
    image_source: recordings # Options: "preview" (default) or "recordings"
 ```
 When using `recordings`, frames are extracted at 480p resolution (480px height), providing better detail for the LLM while being mindful of context window size. This is particularly useful for scenarios where fine details matter, such as identifying license plates, reading text, or analyzing distant objects. Note that using recordings will:
 - Provide higher quality images to the LLM (480p vs 180p preview images)
 - Use more tokens per image (~200-300 tokens vs ~100 tokens for preview)
 - Result in fewer frames being sent to stay within context limits (typically 6-12 frames vs 8-20 frames)
 - Require that recordings are enabled for the camera
 If recordings are not available for a given time period, the system will automatically fall back to using preview frames.
 ### Additional Concerns
 Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example:
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@ -429,6 +429,10 @@ review:
    alerts: True
    # Optional: Enable GenAI review summaries for detections (default: shown below)
    detections: False
    # Optional: Image source for GenAI (default: preview)
    # Options: "preview" (uses cached preview frames at 180p) or "recordings" (extracts frames from recordings at 480p)
    # Using "recordings" provides better image quality but uses ~2-3x more tokens per image (~200-300 vs ~100 tokens)
    image_source: preview
    # Optional: Additional concerns that the GenAI should make note of (default: None)
    additional_concerns:
      - Animals in the garden
--- a/frigate/config/camera/review.py
+++ b/frigate/config/camera/review.py
@ -1,10 +1,18 @@
 from enum import Enum
 from typing import Optional, Union
 from pydantic import Field, field_validator
 from ..base import FrigateBaseModel
-__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig"]
+__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig", "ImageSourceEnum"]
 class ImageSourceEnum(str, Enum):
    """Image source options for GenAI Review."""
    preview = "preview"
    recordings = "recordings"
 DEFAULT_ALERT_OBJECTS = ["person", "car"]
@ -77,6 +85,10 @@ class GenAIReviewConfig(FrigateBaseModel):
    )
    alerts: bool = Field(default=True, title="Enable GenAI for alerts.")
    detections: bool = Field(default=False, title="Enable GenAI for detections.")
    image_source: ImageSourceEnum = Field(
        default=ImageSourceEnum.preview,
        title="Image source for review descriptions.",
    )
    additional_concerns: list[str] = Field(
        default=[],
        title="Additional concerns that GenAI should make note of on this camera.",
--- a/frigate/data_processing/post/review_descriptions.py
+++ b/frigate/data_processing/post/review_descriptions.py
@ -3,6 +3,7 @@
 import copy
 import datetime
 import logging
 import math
 import os
 import shutil
 import threading
@ -10,16 +11,18 @@ from pathlib import Path
 from typing import Any
 import cv2
 from peewee import DoesNotExist
 from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import FrigateConfig
-from frigate.config.camera.review import GenAIReviewConfig
+from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
 from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
 from frigate.data_processing.types import PostProcessDataEnum
 from frigate.genai import GenAIClient
-from frigate.models import ReviewSegment
+from frigate.models import Recordings, ReviewSegment
 from frigate.util.builtin import EventsPerSecond, InferenceSpeed
 from frigate.util.image import get_image_from_recording
 from ..post.api import PostProcessorApi
 from ..types import DataProcessorMetrics
@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
        self.review_descs_dps = EventsPerSecond()
        self.review_descs_dps.start()
-    def calculate_frame_count(self) -> int:
+    def calculate_frame_count(
-        """Calculate optimal number of frames based on context size."""
+        self, image_source: ImageSourceEnum = ImageSourceEnum.preview
-        # With our preview images (height of 180px) each image should be ~100 tokens per image
+    ) -> int:
-        # We want to be conservative to not have too long of query times with too many images
+        """Calculate optimal number of frames based on context size and image source."""
        context_size = self.genai_client.get_context_size()
-        if context_size > 10000:
+        if image_source == ImageSourceEnum.recordings:
-            return 20
+            # With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
-        elif context_size > 6000:
+            # This is ~2-3x more than preview images, so we reduce frame count accordingly
-            return 16
+            # to avoid exceeding context limits and maintain reasonable inference times
-        elif context_size > 4000:
+            if context_size > 10000:
-            return 12
+                return 12
            elif context_size > 6000:
                return 10
            elif context_size > 4000:
                return 8
            else:
                return 6
        else:
-            return 8
+            # With preview images (180px height), each image uses ~100 tokens
            # We can send more frames since they're lower resolution
            if context_size > 10000:
                return 20
            elif context_size > 6000:
                return 16
            elif context_size > 4000:
                return 12
            else:
                return 8
    def process_data(self, data, data_type):
        self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
            ):
                return
-            frames = self.get_cache_frames(
+            image_source = camera_config.review.genai.image_source
                camera, final_data["start_time"], final_data["end_time"]
            )
-            if not frames:
+            if image_source == ImageSourceEnum.recordings:
-                frames = [final_data["thumb_path"]]
+                thumbs = self.get_recording_frames(
-
+                    camera,
-            thumbs = []
+                    final_data["start_time"],
-
+                    final_data["end_time"],
-            for idx, thumb_path in enumerate(frames):
+                    height=480,  # Use 480p for good balance between quality and token usage
                thumb_data = cv2.imread(thumb_path)
                ret, jpg = cv2.imencode(
                    ".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
                )
-                if ret:
+                if not thumbs:
-                    thumbs.append(jpg.tobytes())
+                    # Fallback to preview frames if no recordings available
-
+                    logger.warning(
-                if camera_config.review.genai.debug_save_thumbnails:
+                        f"No recording frames found for {camera}, falling back to preview frames"
-                    id = data["after"]["id"]
+                    )
-                    Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
+                    thumbs = self.get_preview_frames_as_bytes(
                        camera,
                        final_data["start_time"],
                        final_data["end_time"],
                        final_data["thumb_path"],
                        id,
                        camera_config.review.genai.debug_save_thumbnails,
                    )
                elif camera_config.review.genai.debug_save_thumbnails:
                    # Save debug thumbnails for recordings
                    Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
                        parents=True, exist_ok=True
                    )
-                    shutil.copy(
+                    for idx, frame_bytes in enumerate(thumbs):
-                        thumb_path,
+                        with open(
-                        os.path.join(
+                            os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
-                            CLIPS_DIR,
+                            "wb",
-                            f"genai-requests/{id}/{idx}.webp",
+                        ) as f:
-                        ),
+                            f.write(frame_bytes)
-                    )
+            else:
                # Use preview frames
                thumbs = self.get_preview_frames_as_bytes(
                    camera,
                    final_data["start_time"],
                    final_data["end_time"],
                    final_data["thumb_path"],
                    id,
                    camera_config.review.genai.debug_save_thumbnails,
                )
            # kickoff analysis
            self.review_descs_dps.update()
@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
        return selected_frames
    def get_recording_frames(
        self,
        camera: str,
        start_time: float,
        end_time: float,
        height: int = 480,
    ) -> list[bytes]:
        """Get frames from recordings at specified timestamps."""
        duration = end_time - start_time
        desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
        # Calculate evenly spaced timestamps throughout the duration
        if desired_frame_count == 1:
            timestamps = [start_time + duration / 2]
        else:
            step = duration / (desired_frame_count - 1)
            timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
        def extract_frame_from_recording(ts: float) -> bytes | None:
            """Extract a single frame from recording at given timestamp."""
            try:
                recording = (
                    Recordings.select(
                        Recordings.path,
                        Recordings.start_time,
                    )
                    .where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
                    .where(Recordings.camera == camera)
                    .order_by(Recordings.start_time.desc())
                    .limit(1)
                    .get()
                )
                time_in_segment = ts - recording.start_time
                return get_image_from_recording(
                    self.config.ffmpeg,
                    recording.path,
                    time_in_segment,
                    "mjpeg",
                    height=height,
                )
            except DoesNotExist:
                return None
        frames = []
        for timestamp in timestamps:
            try:
                # Try to extract frame at exact timestamp
                image_data = extract_frame_from_recording(timestamp)
                if not image_data:
                    # Try with rounded timestamp as fallback
                    rounded_timestamp = math.ceil(timestamp)
                    image_data = extract_frame_from_recording(rounded_timestamp)
                if image_data:
                    frames.append(image_data)
                else:
                    logger.warning(
                        f"No recording found for {camera} at timestamp {timestamp}"
                    )
            except Exception as e:
                logger.error(
                    f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
                )
                continue
        return frames
    def get_preview_frames_as_bytes(
        self,
        camera: str,
        start_time: float,
        end_time: float,
        thumb_path_fallback: str,
        review_id: str,
        save_debug: bool,
    ) -> list[bytes]:
        """Get preview frames and convert them to JPEG bytes.
        Args:
            camera: Camera name
            start_time: Start timestamp
            end_time: End timestamp
            thumb_path_fallback: Fallback thumbnail path if no preview frames found
            review_id: Review item ID for debug saving
            save_debug: Whether to save debug thumbnails
        Returns:
            List of JPEG image bytes
        """
        frame_paths = self.get_cache_frames(camera, start_time, end_time)
        if not frame_paths:
            frame_paths = [thumb_path_fallback]
        thumbs = []
        for idx, thumb_path in enumerate(frame_paths):
            thumb_data = cv2.imread(thumb_path)
            ret, jpg = cv2.imencode(
                ".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
            )
            if ret:
                thumbs.append(jpg.tobytes())
            if save_debug:
                Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
                    parents=True, exist_ok=True
                )
                shutil.copy(
                    thumb_path,
                    os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
                )
        return thumbs
@staticmethod
 def run_analysis(