Compare commits

...

3 Commits

Author SHA1 Message Date
Nicolas Mowen
190925375b
Classification fixes (#20677)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Don't run classification on stationary objects and set a maximum number of classifications

* Fix layout of classification selection
2025-10-26 08:41:18 -05:00
Nicolas Mowen
094a0a6e05
Add ability to change source of images for review descriptions (#20676)
* Add ability to change source of images for review descriptions

* Undo
2025-10-26 08:40:38 -05:00
Josh Hawkins
840d567d22
UI tweaks (#20675)
* spacing tweaks and add link to explore for plate

* clear selected objects when changing cameras

* plate link and spacing in object lifecycle

* set tabindex to prevent tooltip from showing on reopen

* show month and day in object lifecycle timestamp
2025-10-26 07:27:07 -05:00
9 changed files with 300 additions and 76 deletions

View File

@ -39,6 +39,26 @@ Each installation and even camera can have different parameters for what is cons
- Brief movement with legitimate items (bags, packages, tools, equipment) in appropriate zones is routine.
```
### Image Source
By default, review summaries use preview images (cached preview frames) which have a lower resolution but use fewer tokens per image. For better image quality and more detailed analysis, you can configure Frigate to extract frames directly from recordings at a higher resolution:
```yaml
review:
genai:
enabled: true
image_source: recordings # Options: "preview" (default) or "recordings"
```
When using `recordings`, frames are extracted at 480p resolution (480px height), providing better detail for the LLM while being mindful of context window size. This is particularly useful for scenarios where fine details matter, such as identifying license plates, reading text, or analyzing distant objects. Note that using recordings will:
- Provide higher quality images to the LLM (480p vs 180p preview images)
- Use more tokens per image (~200-300 tokens vs ~100 tokens for preview)
- Result in fewer frames being sent to stay within context limits (typically 6-12 frames vs 8-20 frames)
- Require that recordings are enabled for the camera
If recordings are not available for a given time period, the system will automatically fall back to using preview frames.
### Additional Concerns
Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example:

View File

@ -429,6 +429,10 @@ review:
alerts: True
# Optional: Enable GenAI review summaries for detections (default: shown below)
detections: False
# Optional: Image source for GenAI (default: preview)
# Options: "preview" (uses cached preview frames at 180p) or "recordings" (extracts frames from recordings at 480p)
# Using "recordings" provides better image quality but uses ~2-3x more tokens per image (~200-300 vs ~100 tokens)
image_source: preview
# Optional: Additional concerns that the GenAI should make note of (default: None)
additional_concerns:
- Animals in the garden

View File

@ -1,10 +1,18 @@
from enum import Enum
from typing import Optional, Union
from pydantic import Field, field_validator
from ..base import FrigateBaseModel
__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig"]
__all__ = ["ReviewConfig", "DetectionsConfig", "AlertsConfig", "ImageSourceEnum"]
class ImageSourceEnum(str, Enum):
"""Image source options for GenAI Review."""
preview = "preview"
recordings = "recordings"
DEFAULT_ALERT_OBJECTS = ["person", "car"]
@ -77,6 +85,10 @@ class GenAIReviewConfig(FrigateBaseModel):
)
alerts: bool = Field(default=True, title="Enable GenAI for alerts.")
detections: bool = Field(default=False, title="Enable GenAI for detections.")
image_source: ImageSourceEnum = Field(
default=ImageSourceEnum.preview,
title="Image source for review descriptions.",
)
additional_concerns: list[str] = Field(
default=[],
title="Additional concerns that GenAI should make note of on this camera.",

View File

@ -3,6 +3,7 @@
import copy
import datetime
import logging
import math
import os
import shutil
import threading
@ -10,16 +11,18 @@ from pathlib import Path
from typing import Any
import cv2
from peewee import DoesNotExist
from frigate.comms.embeddings_updater import EmbeddingsRequestEnum
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config import FrigateConfig
from frigate.config.camera.review import GenAIReviewConfig
from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient
from frigate.models import ReviewSegment
from frigate.models import Recordings, ReviewSegment
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from frigate.util.image import get_image_from_recording
from ..post.api import PostProcessorApi
from ..types import DataProcessorMetrics
@ -43,20 +46,35 @@ class ReviewDescriptionProcessor(PostProcessorApi):
self.review_descs_dps = EventsPerSecond()
self.review_descs_dps.start()
def calculate_frame_count(self) -> int:
"""Calculate optimal number of frames based on context size."""
# With our preview images (height of 180px) each image should be ~100 tokens per image
# We want to be conservative to not have too long of query times with too many images
def calculate_frame_count(
self, image_source: ImageSourceEnum = ImageSourceEnum.preview
) -> int:
"""Calculate optimal number of frames based on context size and image source."""
context_size = self.genai_client.get_context_size()
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
if image_source == ImageSourceEnum.recordings:
# With recordings at 480p resolution (480px height), each image uses ~200-300 tokens
# This is ~2-3x more than preview images, so we reduce frame count accordingly
# to avoid exceeding context limits and maintain reasonable inference times
if context_size > 10000:
return 12
elif context_size > 6000:
return 10
elif context_size > 4000:
return 8
else:
return 6
else:
return 8
# With preview images (180px height), each image uses ~100 tokens
# We can send more frames since they're lower resolution
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
else:
return 8
def process_data(self, data, data_type):
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@ -88,36 +106,50 @@ class ReviewDescriptionProcessor(PostProcessorApi):
):
return
frames = self.get_cache_frames(
camera, final_data["start_time"], final_data["end_time"]
)
image_source = camera_config.review.genai.image_source
if not frames:
frames = [final_data["thumb_path"]]
thumbs = []
for idx, thumb_path in enumerate(frames):
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
if image_source == ImageSourceEnum.recordings:
thumbs = self.get_recording_frames(
camera,
final_data["start_time"],
final_data["end_time"],
height=480, # Use 480p for good balance between quality and token usage
)
if ret:
thumbs.append(jpg.tobytes())
if camera_config.review.genai.debug_save_thumbnails:
id = data["after"]["id"]
Path(os.path.join(CLIPS_DIR, "genai-requests", f"{id}")).mkdir(
if not thumbs:
# Fallback to preview frames if no recordings available
logger.warning(
f"No recording frames found for {camera}, falling back to preview frames"
)
thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
elif camera_config.review.genai.debug_save_thumbnails:
# Save debug thumbnails for recordings
Path(os.path.join(CLIPS_DIR, "genai-requests", id)).mkdir(
parents=True, exist_ok=True
)
shutil.copy(
thumb_path,
os.path.join(
CLIPS_DIR,
f"genai-requests/{id}/{idx}.webp",
),
)
for idx, frame_bytes in enumerate(thumbs):
with open(
os.path.join(CLIPS_DIR, f"genai-requests/{id}/{idx}.jpg"),
"wb",
) as f:
f.write(frame_bytes)
else:
# Use preview frames
thumbs = self.get_preview_frames_as_bytes(
camera,
final_data["start_time"],
final_data["end_time"],
final_data["thumb_path"],
id,
camera_config.review.genai.debug_save_thumbnails,
)
# kickoff analysis
self.review_descs_dps.update()
@ -231,6 +263,122 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return selected_frames
def get_recording_frames(
self,
camera: str,
start_time: float,
end_time: float,
height: int = 480,
) -> list[bytes]:
"""Get frames from recordings at specified timestamps."""
duration = end_time - start_time
desired_frame_count = self.calculate_frame_count(ImageSourceEnum.recordings)
# Calculate evenly spaced timestamps throughout the duration
if desired_frame_count == 1:
timestamps = [start_time + duration / 2]
else:
step = duration / (desired_frame_count - 1)
timestamps = [start_time + (i * step) for i in range(desired_frame_count)]
def extract_frame_from_recording(ts: float) -> bytes | None:
"""Extract a single frame from recording at given timestamp."""
try:
recording = (
Recordings.select(
Recordings.path,
Recordings.start_time,
)
.where((ts >= Recordings.start_time) & (ts <= Recordings.end_time))
.where(Recordings.camera == camera)
.order_by(Recordings.start_time.desc())
.limit(1)
.get()
)
time_in_segment = ts - recording.start_time
return get_image_from_recording(
self.config.ffmpeg,
recording.path,
time_in_segment,
"mjpeg",
height=height,
)
except DoesNotExist:
return None
frames = []
for timestamp in timestamps:
try:
# Try to extract frame at exact timestamp
image_data = extract_frame_from_recording(timestamp)
if not image_data:
# Try with rounded timestamp as fallback
rounded_timestamp = math.ceil(timestamp)
image_data = extract_frame_from_recording(rounded_timestamp)
if image_data:
frames.append(image_data)
else:
logger.warning(
f"No recording found for {camera} at timestamp {timestamp}"
)
except Exception as e:
logger.error(
f"Error extracting frame from recording for {camera} at {timestamp}: {e}"
)
continue
return frames
def get_preview_frames_as_bytes(
self,
camera: str,
start_time: float,
end_time: float,
thumb_path_fallback: str,
review_id: str,
save_debug: bool,
) -> list[bytes]:
"""Get preview frames and convert them to JPEG bytes.
Args:
camera: Camera name
start_time: Start timestamp
end_time: End timestamp
thumb_path_fallback: Fallback thumbnail path if no preview frames found
review_id: Review item ID for debug saving
save_debug: Whether to save debug thumbnails
Returns:
List of JPEG image bytes
"""
frame_paths = self.get_cache_frames(camera, start_time, end_time)
if not frame_paths:
frame_paths = [thumb_path_fallback]
thumbs = []
for idx, thumb_path in enumerate(frame_paths):
thumb_data = cv2.imread(thumb_path)
ret, jpg = cv2.imencode(
".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100]
)
if ret:
thumbs.append(jpg.tobytes())
if save_debug:
Path(os.path.join(CLIPS_DIR, "genai-requests", review_id)).mkdir(
parents=True, exist_ok=True
)
shutil.copy(
thumb_path,
os.path.join(CLIPS_DIR, f"genai-requests/{review_id}/{idx}.webp"),
)
return thumbs
@staticmethod
def run_analysis(

View File

@ -34,6 +34,8 @@ except ModuleNotFoundError:
logger = logging.getLogger(__name__)
MAX_OBJECT_CLASSIFICATIONS = 16
class CustomStateClassificationProcessor(RealTimeProcessorApi):
def __init__(
@ -396,6 +398,18 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
if obj_data.get("end_time") is not None:
return
if obj_data.get("stationary"):
return
object_id = obj_data["id"]
if (
object_id in self.classification_history
and len(self.classification_history[object_id])
>= MAX_OBJECT_CLASSIFICATIONS
):
return
now = datetime.datetime.now().timestamp()
x, y, x2, y2 = calculate_region(
frame.shape,
@ -427,7 +441,7 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
write_classification_attempt(
self.train_dir,
cv2.cvtColor(crop, cv2.COLOR_RGB2BGR),
obj_data["id"],
object_id,
now,
"unknown",
0.0,
@ -448,7 +462,7 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
write_classification_attempt(
self.train_dir,
cv2.cvtColor(crop, cv2.COLOR_RGB2BGR),
obj_data["id"],
object_id,
now,
self.labelmap[best_id],
score,
@ -461,7 +475,7 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
sub_label = self.labelmap[best_id]
consensus_label, consensus_score = self.get_weighted_score(
obj_data["id"], sub_label, score, now
object_id, sub_label, score, now
)
if consensus_label is not None:
@ -470,7 +484,7 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
== ObjectClassificationType.sub_label
):
self.sub_label_publisher.publish(
(obj_data["id"], consensus_label, consensus_score),
(object_id, consensus_label, consensus_score),
EventMetadataTypeEnum.sub_label,
)
elif (
@ -479,7 +493,7 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi):
):
self.sub_label_publisher.publish(
(
obj_data["id"],
object_id,
self.model_config.name,
consensus_label,
consensus_score,

View File

@ -41,7 +41,7 @@ import {
ContextMenuItem,
ContextMenuTrigger,
} from "@/components/ui/context-menu";
import { useNavigate } from "react-router-dom";
import { Link, useNavigate } from "react-router-dom";
import { ObjectPath } from "./ObjectPath";
import { getLifecycleItemDescription } from "@/utils/lifecycleUtil";
import { IoPlayCircleOutline } from "react-icons/io5";
@ -289,10 +289,10 @@ export default function ObjectLifecycle({
timezone: config.ui.timezone,
date_format:
config.ui.time_format == "24hour"
? t("time.formattedTimestampHourMinuteSecond.24hour", {
? t("time.formattedTimestamp.24hour", {
ns: "common",
})
: t("time.formattedTimestampHourMinuteSecond.12hour", {
: t("time.formattedTimestamp.12hour", {
ns: "common",
}),
time_style: "medium",
@ -305,10 +305,10 @@ export default function ObjectLifecycle({
timezone: config.ui.timezone,
date_format:
config.ui.time_format == "24hour"
? t("time.formattedTimestampHourMinuteSecond.24hour", {
? t("time.formattedTimestamp.24hour", {
ns: "common",
})
: t("time.formattedTimestampHourMinuteSecond.12hour", {
: t("time.formattedTimestamp.12hour", {
ns: "common",
}),
time_style: "medium",
@ -412,6 +412,7 @@ export default function ObjectLifecycle({
return (
<div className={className}>
<span tabIndex={0} className="sr-only" />
{!fullscreen && (
<div className={cn("flex items-center gap-2")}>
<Button
@ -649,10 +650,15 @@ export default function ObjectLifecycle({
</span>
{event.data?.recognized_license_plate && (
<>
·{" "}
<span className="text-sm text-secondary-foreground">
{event.data.recognized_license_plate}
</span>
<span className="text-secondary-foreground">·</span>
<div className="text-sm text-secondary-foreground">
<Link
to={`/explore?recognized_license_plate=${event.data.recognized_license_plate}`}
className="text-sm"
>
{event.data.recognized_license_plate}
</Link>
</div>
</>
)}
</div>
@ -832,10 +838,12 @@ function LifecycleIconRow({
/>
</div>
<div className="flex w-full flex-row justify-between">
<div className="ml-2 flex w-full min-w-0 flex-1">
<div className="flex flex-col">
<div>{getLifecycleItemDescription(item)}</div>
<div className="mt-1 flex flex-wrap items-center gap-2 text-sm text-secondary-foreground md:gap-5">
<div className="text-md flex items-start break-words text-left">
{getLifecycleItemDescription(item)}
</div>
<div className="mt-1 flex flex-wrap items-center gap-2 text-xs text-secondary-foreground md:gap-5">
<div className="flex items-center gap-1">
<span className="text-primary-variant">
{t("objectLifecycle.lifecycleItemDesc.header.ratio")}
@ -893,8 +901,9 @@ function LifecycleIconRow({
)}
</div>
</div>
<div className={cn("p-1 text-sm")}>{formattedEventTimestamp}</div>
</div>
<div className="ml-3 flex-shrink-0 px-1 text-right text-xs text-primary-variant">
<div className="whitespace-nowrap">{formattedEventTimestamp}</div>
</div>
</div>
</div>

View File

@ -22,6 +22,7 @@ import EventMenu from "@/components/timeline/EventMenu";
import { FrigatePlusDialog } from "@/components/overlay/dialog/FrigatePlusDialog";
import { cn } from "@/lib/utils";
import { Tooltip, TooltipContent, TooltipTrigger } from "../ui/tooltip";
import { Link } from "react-router-dom";
type DetailStreamProps = {
reviewItems?: ReviewSegment[];
@ -499,15 +500,22 @@ function EventList({
}}
role="button"
>
<span className="capitalize">{label}</span>
{event.data?.recognized_license_plate && (
<>
·{" "}
<span className="text-sm text-secondary-foreground">
{event.data.recognized_license_plate}
</span>
</>
)}
<div className="flex gap-2">
<span className="capitalize">{label}</span>
{event.data?.recognized_license_plate && (
<>
<span className="text-secondary-foreground">·</span>
<div className="text-sm text-secondary-foreground">
<Link
to={`/explore?recognized_license_plate=${event.data.recognized_license_plate}`}
className="text-sm"
>
{event.data.recognized_license_plate}
</Link>
</div>
</>
)}
</div>
</div>
</div>
<div className="mr-2 flex flex-row justify-end">
@ -615,10 +623,11 @@ function LifecycleItem({
)}
/>
</div>
<div className="flex w-full flex-row justify-between">
<div className="ml-0.5 flex min-w-0 flex-1">
<Tooltip>
<TooltipTrigger>
<div className="flex items-start text-left">
<div className="flex items-start break-words text-left">
{getLifecycleItemDescription(item)}
</div>
</TooltipTrigger>
@ -638,7 +647,9 @@ function LifecycleItem({
</span>
{areaPx !== undefined && areaPct !== undefined ? (
<span className="font-medium text-foreground">
{areaPx} {t("pixels", { ns: "common" })} · {areaPct}%
{areaPx} {t("pixels", { ns: "common" })}{" "}
<span className="text-secondary-foreground">·</span>{" "}
{areaPct}%
</span>
) : (
<span>N/A</span>
@ -648,7 +659,10 @@ function LifecycleItem({
</div>
</TooltipContent>
</Tooltip>
<div className={cn("p-1 text-xs")}>{formattedEventTimestamp}</div>
</div>
<div className="ml-3 flex-shrink-0 px-1 text-right text-xs text-primary-variant">
<div className="whitespace-nowrap">{formattedEventTimestamp}</div>
</div>
</div>
);

View File

@ -58,6 +58,11 @@ export function DetailStreamProvider({
setAnnotationOffset(cfgOffset);
}, [config, camera]);
// Clear selected objects when exiting detail mode or changing cameras
useEffect(() => {
setSelectedObjectIds([]);
}, [isDetailMode, camera]);
const value: DetailStreamContextType = {
selectedObjectIds,
currentTime,

View File

@ -11,7 +11,6 @@ import {
FrigateConfig,
} from "@/types/frigateConfig";
import { useEffect, useMemo, useState } from "react";
import { isMobile } from "react-device-detect";
import { useTranslation } from "react-i18next";
import { FaFolderPlus } from "react-icons/fa";
import { MdModelTraining } from "react-icons/md";
@ -131,7 +130,7 @@ export default function ModelSelectionView({
</Button>
</div>
</div>
<div className="flex size-full gap-2 p-2">
<div className="grid auto-rows-max grid-cols-2 gap-2 overflow-y-auto p-2 md:grid-cols-4 lg:grid-cols-5 xl:grid-cols-6 2xl:grid-cols-8 3xl:grid-cols-10">
{selectedClassificationConfigs.length === 0 ? (
<NoModelsView
onCreateModel={() => setNewModel(true)}
@ -208,14 +207,13 @@ function ModelCard({ config, onClick }: ModelCardProps) {
<div
key={config.name}
className={cn(
"relative size-60 cursor-pointer overflow-hidden rounded-lg",
"relative aspect-square w-full cursor-pointer overflow-hidden rounded-lg",
"outline-transparent duration-500",
isMobile && "w-full",
)}
onClick={() => onClick()}
>
<img
className={cn("size-full", isMobile && "w-full")}
className="size-full"
src={`${baseUrl}clips/${config.name}/dataset/${coverImage?.name}/${coverImage?.img}`}
/>
<ImageShadowOverlay />