Add ability to change source of images for review descriptions (#20676)

* Add ability to change source of images for review descriptions

* Undo
This commit is contained in:
Nicolas Mowen 2025-10-26 07:40:38 -06:00 committed by GitHub
parent 840d567d22
commit 094a0a6e05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 223 additions and 39 deletions

View File

@ -39,6 +39,26 @@ Each installation and even camera can have different parameters for what is cons
- Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine. - Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine.
``` ```
### Image Source
By default, review summaries use preview images (cached preview frames) which have a lower resolution but use fewer tokens per image. For better image quality and more detailed analysis, you can configure Frigate to extract frames directly from recordings at a higher resolution:
```yaml
review:
genai:
enabled: true
image_source: recordings # Options: "preview" (default) or "recordings"
```
When using `recordings`, frames are extracted at 480p resolution (480px height), providing better detail for the LLM while being mindful of context window size. This is particularly useful for scenarios where fine details matter, such as identifying license plates, reading text, or analyzing distant objects. Note that using recordings will:
- Provide higher quality images to the LLM (480p vs 180p preview images)
- Use more tokens per image (~200-300 tokens vs ~100 tokens for preview)
- Result in fewer frames being sent to stay within context limits (typically 6-12 frames vs 8-20 frames)
- Require that recordings are enabled for the camera
If recordings are not available for a given time period, the system will automatically fall back to using preview frames.
### Additional Concerns ### Additional Concerns
Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example: Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example:

View File

@ -429,6 +429,10 @@ review:
alerts: True alerts: True
# Optional: Enable GenAI review summaries for detections (default: shown below) # Optional: Enable GenAI review summaries for detections (default: shown below)
detections: False detections: False
# Optional: Image source for GenAI (default: preview)
# Options: "preview" (uses cached preview frames at 180p) or "recordings" (extracts frames from recordings at 480p)
# Using "recordings" provides better image quality but uses ~2-3x more tokens per image (~200-300 vs ~100 tokens)
image_source: preview
# Optional: Additional concerns that the GenAI should make note of (default: None) # Optional: Additional concerns that the GenAI should make note of (default: None)
additional_concerns: additional_concerns:
- Animals in the garden - Animals in the garden

View File

@ -1,10 +1,18 @@
from enum import Enum
from typing import Optional, Union from typing import Optional, Union
from pydantic import Field, field_validator from pydantic import Field, field_validator
from ..base import FrigateBaseModel from ..base import FrigateBaseModel
__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig"] __all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig", "ImageSourceEnum"]
class ImageSourceEnum(str, Enum):
"""Image source options for GenAI Review."""
preview = "preview"
recordings = "recordings"
DEFAULT_ALERT_OBJECTS = ["person", "car"] DEFAULT_ALERT_OBJECTS = ["person", "car"]
@ -77,6 +85,10 @@ class GenAIReviewConfig(FrigateBaseModel):
) )
alerts: bool = Field(default=True, title="Enable GenAI for alerts.") alerts: bool = Field(default=True, title="Enable GenAI for alerts.")
detections: bool = Field(default=False, title="Enable GenAI for detections.") detections: bool = Field(default=False, title="Enable GenAI for detections.")
image_source: ImageSourceEnum = Field(
default=ImageSourceEnum.preview,
title="Image source for review descriptions.",
)
additional_concerns: list[str] = Field( additional_concerns: list[str] = Field(
default=[], default=[],
title="Additional concerns that GenAI should make note of on this camera.", title="Additional concerns that GenAI should make note of on this camera.",

View File

@ -3,6 +3,7 @@
import copy import copy
import datetime import datetime
import logging import logging
import math
import os import os
import shutil import shutil
import threading import threading
@ -10,16 +11,18 @@ from pathlib import Path
from typing import Any from typing import Any
import cv2 import cv2
from peewee import DoesNotExist
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig from frigate.config import FrigateConfig
from frigate.config.camera.review import GenAIReviewConfig from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient from frigate.genai import GenAIClient
from frigate.models import ReviewSegment from frigate.models import Recordings, ReviewSegment
from frigate.util.builtin import EventsPerSecond, InferenceSpeed from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from frigate.util.image import get_image_from_recording
from ..post.api import PostProcessorApi from ..post.api import PostProcessorApi
from ..types import DataProcessorMetrics from ..types import DataProcessorMetrics
@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
self.review_descs_dps = EventsPerSecond() self.review_descs_dps = EventsPerSecond()
self.review_descs_dps.start() self.review_descs_dps.start()
def calculate_frame_count(self) -> int: def calculate_frame_count(
"""Calculate optimal number of frames based on context size.""" self, image_source: ImageSourceEnum = ImageSourceEnum.preview
# With our preview images (height of 180px) each image should be ~100 tokens per image ) -> int:
# We want to be conservative to not have too long of query times with too many images """Calculate optimal number of frames based on context size and image source."""
context_size = self.genai_client.get_context_size() context_size = self.genai_client.get_context_size()
if context_size > 10000: if image_source == ImageSourceEnum.recordings:
return 20 # With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
elif context_size > 6000: # This is ~2-3x more than preview images, so we reduce frame count accordingly
return 16 # to avoid exceeding context limits and maintain reasonable inference times
elif context_size > 4000: if context_size > 10000:
return 12 return 12
elif context_size > 6000:
return 10
elif context_size > 4000:
return 8
else:
return 6
else: else:
return 8 # With preview images (180px height), each image uses ~100 tokens
# We can send more frames since they're lower resolution
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
else:
return 8
def process_data(self, data, data_type): def process_data(self, data, data_type):
self.metrics.review_desc_dps.value = self.review_descs_dps.eps() self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
): ):
return return
frames = self.get_cache_frames( image_source = camera_config.review.genai.image_source
camera, final_data["start_time"], final_data["end_time"]
)
if not frames: if image_source == ImageSourceEnum.recordings:
frames = [final_data["thumb_path"]] thumbs = self.get_recording_frames(
camera,
thumbs = [] final_data["start_time"],
final_data["end_time"],
for idx, thumb_path in enumerate(frames): height=480, # Use 480p for good balance between quality and token usage
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
) )
if ret: if not thumbs:
thumbs.append(jpg.tobytes()) # Fallback to preview frames if no recordings available
logger.warning(
if camera_config.review.genai.debug_save_thumbnails: f"No recording frames found for {camera}, falling back to preview frames"
id = data["after"]["id"] )
Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir( thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
elif camera_config.review.genai.debug_save_thumbnails:
# Save debug thumbnails for recordings
Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
parents=True, exist_ok=True parents=True, exist_ok=True
) )
shutil.copy( for idx, frame_bytes in enumerate(thumbs):
thumb_path, with open(
os.path.join( os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
CLIPS_DIR, "wb",
f"genai-requests/{id}/{idx}.webp", ) as f:
), f.write(frame_bytes)
) else:
# Use preview frames
thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
# kickoff analysis # kickoff analysis
self.review_descs_dps.update() self.review_descs_dps.update()
@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return selected_frames return selected_frames
def get_recording_frames(
self,
camera: str,
start_time: float,
end_time: float,
height: int = 480,
) -> list[bytes]:
"""Get frames from recordings at specified timestamps."""
duration = end_time - start_time
desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
# Calculate evenly spaced timestamps throughout the duration
if desired_frame_count == 1:
timestamps = [start_time + duration / 2]
else:
step = duration / (desired_frame_count - 1)
timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
def extract_frame_from_recording(ts: float) -> bytes | None:
"""Extract a single frame from recording at given timestamp."""
try:
recording = (
Recordings.select(
Recordings.path,
Recordings.start_time,
)
.where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
.where(Recordings.camera == camera)
.order_by(Recordings.start_time.desc())
.limit(1)
.get()
)
time_in_segment = ts - recording.start_time
return get_image_from_recording(
self.config.ffmpeg,
recording.path,
time_in_segment,
"mjpeg",
height=height,
)
except DoesNotExist:
return None
frames = []
for timestamp in timestamps:
try:
# Try to extract frame at exact timestamp
image_data = extract_frame_from_recording(timestamp)
if not image_data:
# Try with rounded timestamp as fallback
rounded_timestamp = math.ceil(timestamp)
image_data = extract_frame_from_recording(rounded_timestamp)
if image_data:
frames.append(image_data)
else:
logger.warning(
f"No recording found for {camera} at timestamp {timestamp}"
)
except Exception as e:
logger.error(
f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
)
continue
return frames
def get_preview_frames_as_bytes(
self,
camera: str,
start_time: float,
end_time: float,
thumb_path_fallback: str,
review_id: str,
save_debug: bool,
) -> list[bytes]:
"""Get preview frames and convert them to JPEG bytes.
Args:
camera: Camera name
start_time: Start timestamp
end_time: End timestamp
thumb_path_fallback: Fallback thumbnail path if no preview frames found
review_id: Review item ID for debug saving
save_debug: Whether to save debug thumbnails
Returns:
List of JPEG image bytes
"""
frame_paths = self.get_cache_frames(camera, start_time, end_time)
if not frame_paths:
frame_paths = [thumb_path_fallback]
thumbs = []
for idx, thumb_path in enumerate(frame_paths):
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
)
if ret:
thumbs.append(jpg.tobytes())
if save_debug:
Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
parents=True, exist_ok=True
)
shutil.copy(
thumb_path,
os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
)
return thumbs
@staticmethod @staticmethod
def run_analysis( def run_analysis(