mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-12-06 05:24:11 +03:00
Add ability to change source of images for review descriptions (#20676)
* Add ability to change source of images for review descriptions * Undo
This commit is contained in:
parent
840d567d22
commit
094a0a6e05
@ -39,6 +39,26 @@ Each installation and even camera can have different parameters for what is cons
|
|||||||
- Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine.
|
- Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Image Source
|
||||||
|
|
||||||
|
By default, review summaries use preview images (cached preview frames) which have a lower resolution but use fewer tokens per image. For better image quality and more detailed analysis, you can configure Frigate to extract frames directly from recordings at a higher resolution:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
review:
|
||||||
|
genai:
|
||||||
|
enabled: true
|
||||||
|
image_source: recordings # Options: "preview" (default) or "recordings"
|
||||||
|
```
|
||||||
|
|
||||||
|
When using `recordings`, frames are extracted at 480p resolution (480px height), providing better detail for the LLM while being mindful of context window size. This is particularly useful for scenarios where fine details matter, such as identifying license plates, reading text, or analyzing distant objects. Note that using recordings will:
|
||||||
|
|
||||||
|
- Provide higher quality images to the LLM (480p vs 180p preview images)
|
||||||
|
- Use more tokens per image (~200-300 tokens vs ~100 tokens for preview)
|
||||||
|
- Result in fewer frames being sent to stay within context limits (typically 6-12 frames vs 8-20 frames)
|
||||||
|
- Require that recordings are enabled for the camera
|
||||||
|
|
||||||
|
If recordings are not available for a given time period, the system will automatically fall back to using preview frames.
|
||||||
|
|
||||||
### Additional Concerns
|
### Additional Concerns
|
||||||
|
|
||||||
Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example:
|
Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example:
|
||||||
|
|||||||
@ -429,6 +429,10 @@ review:
|
|||||||
alerts: True
|
alerts: True
|
||||||
# Optional: Enable GenAI review summaries for detections (default: shown below)
|
# Optional: Enable GenAI review summaries for detections (default: shown below)
|
||||||
detections: False
|
detections: False
|
||||||
|
# Optional: Image source for GenAI (default: preview)
|
||||||
|
# Options: "preview" (uses cached preview frames at 180p) or "recordings" (extracts frames from recordings at 480p)
|
||||||
|
# Using "recordings" provides better image quality but uses ~2-3x more tokens per image (~200-300 vs ~100 tokens)
|
||||||
|
image_source: preview
|
||||||
# Optional: Additional concerns that the GenAI should make note of (default: None)
|
# Optional: Additional concerns that the GenAI should make note of (default: None)
|
||||||
additional_concerns:
|
additional_concerns:
|
||||||
- Animals in the garden
|
- Animals in the garden
|
||||||
|
|||||||
@ -1,10 +1,18 @@
|
|||||||
|
from enum import Enum
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
from pydantic import Field, field_validator
|
from pydantic import Field, field_validator
|
||||||
|
|
||||||
from ..base import FrigateBaseModel
|
from ..base import FrigateBaseModel
|
||||||
|
|
||||||
__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig"]
|
__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig", "ImageSourceEnum"]
|
||||||
|
|
||||||
|
|
||||||
|
class ImageSourceEnum(str, Enum):
|
||||||
|
"""Image source options for GenAI Review."""
|
||||||
|
|
||||||
|
preview = "preview"
|
||||||
|
recordings = "recordings"
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_ALERT_OBJECTS = ["person", "car"]
|
DEFAULT_ALERT_OBJECTS = ["person", "car"]
|
||||||
@ -77,6 +85,10 @@ class GenAIReviewConfig(FrigateBaseModel):
|
|||||||
)
|
)
|
||||||
alerts: bool = Field(default=True, title="Enable GenAI for alerts.")
|
alerts: bool = Field(default=True, title="Enable GenAI for alerts.")
|
||||||
detections: bool = Field(default=False, title="Enable GenAI for detections.")
|
detections: bool = Field(default=False, title="Enable GenAI for detections.")
|
||||||
|
image_source: ImageSourceEnum = Field(
|
||||||
|
default=ImageSourceEnum.preview,
|
||||||
|
title="Image source for review descriptions.",
|
||||||
|
)
|
||||||
additional_concerns: list[str] = Field(
|
additional_concerns: list[str] = Field(
|
||||||
default=[],
|
default=[],
|
||||||
title="Additional concerns that GenAI should make note of on this camera.",
|
title="Additional concerns that GenAI should make note of on this camera.",
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
import math
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import threading
|
import threading
|
||||||
@ -10,16 +11,18 @@ from pathlib import Path
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
|
from peewee import DoesNotExist
|
||||||
|
|
||||||
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
|
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
|
||||||
from frigate.comms.inter_process import InterProcessRequestor
|
from frigate.comms.inter_process import InterProcessRequestor
|
||||||
from frigate.config import FrigateConfig
|
from frigate.config import FrigateConfig
|
||||||
from frigate.config.camera.review import GenAIReviewConfig
|
from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
|
||||||
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
|
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
|
||||||
from frigate.data_processing.types import PostProcessDataEnum
|
from frigate.data_processing.types import PostProcessDataEnum
|
||||||
from frigate.genai import GenAIClient
|
from frigate.genai import GenAIClient
|
||||||
from frigate.models import ReviewSegment
|
from frigate.models import Recordings, ReviewSegment
|
||||||
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
|
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
|
||||||
|
from frigate.util.image import get_image_from_recording
|
||||||
|
|
||||||
from ..post.api import PostProcessorApi
|
from ..post.api import PostProcessorApi
|
||||||
from ..types import DataProcessorMetrics
|
from ..types import DataProcessorMetrics
|
||||||
@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
|||||||
self.review_descs_dps = EventsPerSecond()
|
self.review_descs_dps = EventsPerSecond()
|
||||||
self.review_descs_dps.start()
|
self.review_descs_dps.start()
|
||||||
|
|
||||||
def calculate_frame_count(self) -> int:
|
def calculate_frame_count(
|
||||||
"""Calculate optimal number of frames based on context size."""
|
self, image_source: ImageSourceEnum = ImageSourceEnum.preview
|
||||||
# With our preview images (height of 180px) each image should be ~100 tokens per image
|
) -> int:
|
||||||
# We want to be conservative to not have too long of query times with too many images
|
"""Calculate optimal number of frames based on context size and image source."""
|
||||||
context_size = self.genai_client.get_context_size()
|
context_size = self.genai_client.get_context_size()
|
||||||
|
|
||||||
if context_size > 10000:
|
if image_source == ImageSourceEnum.recordings:
|
||||||
return 20
|
# With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
|
||||||
elif context_size > 6000:
|
# This is ~2-3x more than preview images, so we reduce frame count accordingly
|
||||||
return 16
|
# to avoid exceeding context limits and maintain reasonable inference times
|
||||||
elif context_size > 4000:
|
if context_size > 10000:
|
||||||
return 12
|
return 12
|
||||||
|
elif context_size > 6000:
|
||||||
|
return 10
|
||||||
|
elif context_size > 4000:
|
||||||
|
return 8
|
||||||
|
else:
|
||||||
|
return 6
|
||||||
else:
|
else:
|
||||||
return 8
|
# With preview images (180px height), each image uses ~100 tokens
|
||||||
|
# We can send more frames since they're lower resolution
|
||||||
|
if context_size > 10000:
|
||||||
|
return 20
|
||||||
|
elif context_size > 6000:
|
||||||
|
return 16
|
||||||
|
elif context_size > 4000:
|
||||||
|
return 12
|
||||||
|
else:
|
||||||
|
return 8
|
||||||
|
|
||||||
def process_data(self, data, data_type):
|
def process_data(self, data, data_type):
|
||||||
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
|
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
|
||||||
@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
|||||||
):
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
frames = self.get_cache_frames(
|
image_source = camera_config.review.genai.image_source
|
||||||
camera, final_data["start_time"], final_data["end_time"]
|
|
||||||
)
|
|
||||||
|
|
||||||
if not frames:
|
if image_source == ImageSourceEnum.recordings:
|
||||||
frames = [final_data["thumb_path"]]
|
thumbs = self.get_recording_frames(
|
||||||
|
camera,
|
||||||
thumbs = []
|
final_data["start_time"],
|
||||||
|
final_data["end_time"],
|
||||||
for idx, thumb_path in enumerate(frames):
|
height=480, # Use 480p for good balance between quality and token usage
|
||||||
thumb_data = cv2.imread(thumb_path)
|
|
||||||
ret, jpg = cv2.imencode(
|
|
||||||
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if ret:
|
if not thumbs:
|
||||||
thumbs.append(jpg.tobytes())
|
# Fallback to preview frames if no recordings available
|
||||||
|
logger.warning(
|
||||||
if camera_config.review.genai.debug_save_thumbnails:
|
f"No recording frames found for {camera}, falling back to preview frames"
|
||||||
id = data["after"]["id"]
|
)
|
||||||
Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
|
thumbs = self.get_preview_frames_as_bytes(
|
||||||
|
camera,
|
||||||
|
final_data["start_time"],
|
||||||
|
final_data["end_time"],
|
||||||
|
final_data["thumb_path"],
|
||||||
|
id,
|
||||||
|
camera_config.review.genai.debug_save_thumbnails,
|
||||||
|
)
|
||||||
|
elif camera_config.review.genai.debug_save_thumbnails:
|
||||||
|
# Save debug thumbnails for recordings
|
||||||
|
Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
|
||||||
parents=True, exist_ok=True
|
parents=True, exist_ok=True
|
||||||
)
|
)
|
||||||
shutil.copy(
|
for idx, frame_bytes in enumerate(thumbs):
|
||||||
thumb_path,
|
with open(
|
||||||
os.path.join(
|
os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
|
||||||
CLIPS_DIR,
|
"wb",
|
||||||
f"genai-requests/{id}/{idx}.webp",
|
) as f:
|
||||||
),
|
f.write(frame_bytes)
|
||||||
)
|
else:
|
||||||
|
# Use preview frames
|
||||||
|
thumbs = self.get_preview_frames_as_bytes(
|
||||||
|
camera,
|
||||||
|
final_data["start_time"],
|
||||||
|
final_data["end_time"],
|
||||||
|
final_data["thumb_path"],
|
||||||
|
id,
|
||||||
|
camera_config.review.genai.debug_save_thumbnails,
|
||||||
|
)
|
||||||
|
|
||||||
# kickoff analysis
|
# kickoff analysis
|
||||||
self.review_descs_dps.update()
|
self.review_descs_dps.update()
|
||||||
@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
|||||||
|
|
||||||
return selected_frames
|
return selected_frames
|
||||||
|
|
||||||
|
def get_recording_frames(
|
||||||
|
self,
|
||||||
|
camera: str,
|
||||||
|
start_time: float,
|
||||||
|
end_time: float,
|
||||||
|
height: int = 480,
|
||||||
|
) -> list[bytes]:
|
||||||
|
"""Get frames from recordings at specified timestamps."""
|
||||||
|
duration = end_time - start_time
|
||||||
|
desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
|
||||||
|
|
||||||
|
# Calculate evenly spaced timestamps throughout the duration
|
||||||
|
if desired_frame_count == 1:
|
||||||
|
timestamps = [start_time + duration / 2]
|
||||||
|
else:
|
||||||
|
step = duration / (desired_frame_count - 1)
|
||||||
|
timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
|
||||||
|
|
||||||
|
def extract_frame_from_recording(ts: float) -> bytes | None:
|
||||||
|
"""Extract a single frame from recording at given timestamp."""
|
||||||
|
try:
|
||||||
|
recording = (
|
||||||
|
Recordings.select(
|
||||||
|
Recordings.path,
|
||||||
|
Recordings.start_time,
|
||||||
|
)
|
||||||
|
.where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
|
||||||
|
.where(Recordings.camera == camera)
|
||||||
|
.order_by(Recordings.start_time.desc())
|
||||||
|
.limit(1)
|
||||||
|
.get()
|
||||||
|
)
|
||||||
|
|
||||||
|
time_in_segment = ts - recording.start_time
|
||||||
|
return get_image_from_recording(
|
||||||
|
self.config.ffmpeg,
|
||||||
|
recording.path,
|
||||||
|
time_in_segment,
|
||||||
|
"mjpeg",
|
||||||
|
height=height,
|
||||||
|
)
|
||||||
|
except DoesNotExist:
|
||||||
|
return None
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
for timestamp in timestamps:
|
||||||
|
try:
|
||||||
|
# Try to extract frame at exact timestamp
|
||||||
|
image_data = extract_frame_from_recording(timestamp)
|
||||||
|
|
||||||
|
if not image_data:
|
||||||
|
# Try with rounded timestamp as fallback
|
||||||
|
rounded_timestamp = math.ceil(timestamp)
|
||||||
|
image_data = extract_frame_from_recording(rounded_timestamp)
|
||||||
|
|
||||||
|
if image_data:
|
||||||
|
frames.append(image_data)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"No recording found for {camera} at timestamp {timestamp}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return frames
|
||||||
|
|
||||||
|
def get_preview_frames_as_bytes(
|
||||||
|
self,
|
||||||
|
camera: str,
|
||||||
|
start_time: float,
|
||||||
|
end_time: float,
|
||||||
|
thumb_path_fallback: str,
|
||||||
|
review_id: str,
|
||||||
|
save_debug: bool,
|
||||||
|
) -> list[bytes]:
|
||||||
|
"""Get preview frames and convert them to JPEG bytes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
camera: Camera name
|
||||||
|
start_time: Start timestamp
|
||||||
|
end_time: End timestamp
|
||||||
|
thumb_path_fallback: Fallback thumbnail path if no preview frames found
|
||||||
|
review_id: Review item ID for debug saving
|
||||||
|
save_debug: Whether to save debug thumbnails
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of JPEG image bytes
|
||||||
|
"""
|
||||||
|
frame_paths = self.get_cache_frames(camera, start_time, end_time)
|
||||||
|
if not frame_paths:
|
||||||
|
frame_paths = [thumb_path_fallback]
|
||||||
|
|
||||||
|
thumbs = []
|
||||||
|
for idx, thumb_path in enumerate(frame_paths):
|
||||||
|
thumb_data = cv2.imread(thumb_path)
|
||||||
|
ret, jpg = cv2.imencode(
|
||||||
|
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
|
||||||
|
)
|
||||||
|
if ret:
|
||||||
|
thumbs.append(jpg.tobytes())
|
||||||
|
|
||||||
|
if save_debug:
|
||||||
|
Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
|
||||||
|
parents=True, exist_ok=True
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
thumb_path,
|
||||||
|
os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return thumbs
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run_analysis(
|
def run_analysis(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user