GenAI Refactor (#23253)

* Ensure runtime options are passed

* Add attribute info to prompt when configured

* Move GenAI plugins to dedicated directory

* Migrate prompts to dedicated folder

* Move chat prompts to prompts

* Implement reasoning traces in the UI

* Cleanup

* Make azure a subclass of openai

* Implement reasoning for other providers

* mypy

* Cleanup
This commit is contained in:
Nicolas Mowen 2026-05-19 12:03:57 -06:00 committed by GitHub
parent b1de5e2290
commit b0b00fe1d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1108 additions and 930 deletions

View File

@ -35,9 +35,13 @@ from frigate.api.defs.response.chat_response import (
ToolCall,
)
from frigate.api.defs.tags import Tags
from frigate.api.event import events
from frigate.api.event import _build_attribute_filter_clause, events
from frigate.config import FrigateConfig
from frigate.config.ui import UnitSystemEnum
from frigate.genai.prompts import (
build_chat_system_prompt,
get_attribute_classifications,
get_tool_definitions,
)
from frigate.genai.utils import build_assistant_message_for_conversation
from frigate.jobs.vlm_watch import (
get_vlm_watch_job,
@ -68,390 +72,6 @@ class VLMMonitorRequest(BaseModel):
zones: List[str] = []
def get_tool_definitions(
semantic_search_enabled: bool = False,
) -> List[Dict[str, Any]]:
"""
Get OpenAI-compatible tool definitions for Frigate.
Returns a list of tool definitions that can be used with OpenAI-compatible
function calling APIs. When semantic search is enabled, the search_objects
tool exposes an additional `semantic_query` parameter for descriptive
queries (e.g. "person riding a lawn mower") and find_similar_objects is
included.
"""
search_objects_properties: Dict[str, Any] = {
"camera": {
"type": "string",
"description": "Camera name to filter by (optional).",
},
"label": {
"type": "string",
"description": (
"Generic object class to filter by — one of the tracked detector "
"labels such as 'person', 'package', 'car', 'dog', 'bird'. Use "
"this for broad queries like 'show me all cars today'. Combine "
"with semantic_query when the user also describes appearance or "
"behavior (e.g. label='person', semantic_query='riding a lawn "
"mower')."
),
},
"sub_label": {
"type": "string",
"description": (
"Filter by a DISCRETE NAMED entity recognized in the detection. "
"Use this for: a known person's name ('John'), a delivery "
"company ('Amazon', 'UPS'), a recognized animal species or "
"breed ('blue jay', 'cardinal', 'golden retriever'), or a "
"license plate string. When filtering by a specific name, set "
"only sub_label and leave label unset. Do NOT use sub_label "
"for descriptions of appearance, clothing, or actions — those "
"belong in semantic_query."
),
},
"after": {
"type": "string",
"description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
},
"before": {
"type": "string",
"description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "List of zone names to filter by.",
},
"limit": {
"type": "integer",
"description": "Maximum number of objects to return (default: 25).",
"default": 25,
},
}
if semantic_search_enabled:
search_objects_properties["semantic_query"] = {
"type": "string",
"description": (
"Optional natural-language description of a PHYSICAL "
"CHARACTERISTIC, APPEARANCE, or ACTIVITY the user mentioned, "
"used to semantically narrow results. Only set this when the "
"user describes something beyond what label and sub_label can "
"express on their own.\n"
"USE for descriptive phrases like: 'riding a lawn mower', "
"'wearing a red jacket', 'carrying a package', 'walking a "
"dog', 'on a bicycle', 'holding an umbrella'.\n"
"DO NOT USE for:\n"
"- specific named people, pets, or delivery companies → use sub_label\n"
"- animal species or breed names like 'blue jay', 'cardinal', "
"'golden retriever' → use sub_label\n"
"- license plate strings → use sub_label\n"
"- generic object queries like 'all cars today' or 'every "
"person' → use label alone with no semantic_query\n"
"When set, combine with label/time/camera/zone filters as "
"usual (e.g. label='person', semantic_query='riding a lawn "
"mower', after='2024-05-01T00:00:00Z')."
),
}
search_objects_description = (
"Search the historical record of detected objects in Frigate. "
"Use this ONLY for questions about the PAST — e.g. 'did anyone come by today?', "
"'when was the last car?', 'show me detections from yesterday'. "
"Do NOT use this for monitoring or alerting requests about future events — "
"use start_camera_watch instead for those. "
"An 'object' in Frigate represents a tracked detection (e.g., a person, package, car).\n\n"
"Choose filters based on what the user is asking for:\n"
"- Generic class query ('show me all cars today'): set `label` only.\n"
"- Specific NAMED entity (known person, delivery company, animal "
"species/breed like 'blue jay' or 'golden retriever', license "
"plate): set `sub_label` only and leave `label` unset.\n"
)
if semantic_search_enabled:
search_objects_description += (
"- Physical CHARACTERISTIC, APPEARANCE, or ACTIVITY that is not a "
"discrete name ('person riding a lawn mower', 'someone in a red "
"jacket', 'person carrying a package'): set `semantic_query` with "
"the descriptive phrase, optionally alongside `label` for the "
"object class. Do NOT put descriptive phrases in sub_label."
)
return [
{
"type": "function",
"function": {
"name": "search_objects",
"description": search_objects_description,
"parameters": {
"type": "object",
"properties": search_objects_properties,
},
"required": [],
},
},
{
"type": "function",
"function": {
"name": "find_similar_objects",
"description": (
"Find tracked objects that are visually and semantically similar "
"to a specific past event. Use this when the user references a "
"particular object they have seen and wants to find other "
"sightings of the same or similar one ('that green car', 'the "
"person in the red jacket', 'the package that was delivered'). "
"Prefer this over search_objects whenever the user's intent is "
"'find more like this specific one.' Use search_objects first "
"only if you need to locate the anchor event. Requires semantic "
"search to be enabled."
),
"parameters": {
"type": "object",
"properties": {
"event_id": {
"type": "string",
"description": "The id of the anchor event to find similar objects to.",
},
"after": {
"type": "string",
"description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
},
"before": {
"type": "string",
"description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
},
"cameras": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of cameras to restrict to. Defaults to all.",
},
"labels": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of labels to restrict to. Defaults to the anchor event's label.",
},
"sub_labels": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of sub_labels (names) to restrict to.",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of zones. An event matches if any of its zones overlap.",
},
"similarity_mode": {
"type": "string",
"enum": ["visual", "semantic", "fused"],
"description": "Which similarity signal(s) to use. 'fused' (default) combines visual and semantic.",
"default": "fused",
},
"min_score": {
"type": "number",
"description": "Drop matches with a similarity score below this threshold (0.0-1.0).",
},
"limit": {
"type": "integer",
"description": "Maximum number of matches to return (default: 10).",
"default": 10,
},
},
"required": ["event_id"],
},
},
},
{
"type": "function",
"function": {
"name": "set_camera_state",
"description": (
"Change a camera's feature state (e.g., turn detection on/off, enable/disable recordings). "
"Use camera='*' to apply to all cameras at once. "
"Only call this tool when the user explicitly asks to change a camera setting. "
"Requires admin privileges."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera name to target, or '*' to target all cameras.",
},
"feature": {
"type": "string",
"enum": [
"detect",
"record",
"snapshots",
"audio",
"motion",
"enabled",
"birdseye",
"birdseye_mode",
"improve_contrast",
"ptz_autotracker",
"motion_contour_area",
"motion_threshold",
"notifications",
"audio_transcription",
"review_alerts",
"review_detections",
"object_descriptions",
"review_descriptions",
"profile",
],
"description": (
"The feature to change. Most features accept ON or OFF. "
"birdseye_mode accepts CONTINUOUS, MOTION, or OBJECTS. "
"motion_contour_area and motion_threshold accept a number. "
"profile accepts a profile name or 'none' to deactivate (requires camera='*')."
),
},
"value": {
"type": "string",
"description": "The value to set. ON or OFF for toggles, a number for thresholds, a profile name or 'none' for profile.",
},
},
"required": ["camera", "feature", "value"],
},
},
},
{
"type": "function",
"function": {
"name": "get_live_context",
"description": (
"Get the current live image and detection information for a camera: objects being tracked, "
"zones, timestamps. Use this to understand what is visible in the live view. "
"Call this when answering questions about what is happening right now on a specific camera."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera name to get live context for.",
},
},
"required": ["camera"],
},
},
},
{
"type": "function",
"function": {
"name": "start_camera_watch",
"description": (
"Start a continuous VLM watch job that monitors a camera and sends a notification "
"when a specified condition is met. Use this when the user wants to be alerted about "
"a future event, e.g. 'tell me when guests arrive' or 'notify me when the package is picked up'. "
"Only one watch job can run at a time. Returns a job ID."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera ID to monitor.",
},
"condition": {
"type": "string",
"description": (
"Natural-language description of the condition to watch for, "
"e.g. 'a person arrives at the front door'."
),
},
"max_duration_minutes": {
"type": "integer",
"description": "Maximum time to watch before giving up (minutes, default 60).",
"default": 60,
},
"labels": {
"type": "array",
"items": {"type": "string"},
"description": "Object labels that should trigger a VLM check (e.g. ['person', 'car']). If omitted, any detection on the camera triggers a check.",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "Zone names to filter by. If specified, only detections in these zones trigger a VLM check.",
},
},
"required": ["camera", "condition"],
},
},
},
{
"type": "function",
"function": {
"name": "stop_camera_watch",
"description": (
"Cancel the currently running VLM watch job. Use this when the user wants to "
"stop a previously started watch, e.g. 'stop watching the front door'."
),
"parameters": {
"type": "object",
"properties": {},
"required": [],
},
},
},
{
"type": "function",
"function": {
"name": "get_profile_status",
"description": (
"Get the current profile status including the active profile and "
"timestamps of when each profile was last activated. Use this to "
"determine time periods for recap requests — e.g. when the user asks "
"'what happened while I was away?', call this first to find the relevant "
"time window based on profile activation history."
),
"parameters": {
"type": "object",
"properties": {},
"required": [],
},
},
},
{
"type": "function",
"function": {
"name": "get_recap",
"description": (
"Get a recap of all activity (alerts and detections) for a given time period. "
"Use this after calling get_profile_status to retrieve what happened during "
"a specific window — e.g. 'what happened while I was away?'. Returns a "
"chronological list of activity with camera, objects, zones, and GenAI-generated "
"descriptions when available. Summarize the results for the user."
),
"parameters": {
"type": "object",
"properties": {
"after": {
"type": "string",
"description": "Start of the time period in ISO 8601 format (e.g. '2025-03-15T08:00:00').",
},
"before": {
"type": "string",
"description": "End of the time period in ISO 8601 format (e.g. '2025-03-15T17:00:00').",
},
"cameras": {
"type": "string",
"description": "Comma-separated camera IDs to include, or 'all' for all cameras. Default is 'all'.",
},
"severity": {
"type": "string",
"enum": ["alert", "detection"],
"description": "Filter by severity level. Omit to include both alerts and detections.",
},
},
"required": ["after", "before"],
},
},
},
]
@router.get(
"/chat/tools",
dependencies=[Depends(allow_any_authenticated())],
@ -460,10 +80,13 @@ def get_tool_definitions(
)
def get_tools(request: Request) -> JSONResponse:
"""Get list of available tools for LLM function calling."""
semantic_search_enabled = bool(
getattr(request.app.frigate_config.semantic_search, "enabled", False)
config = request.app.frigate_config
semantic_search_enabled = bool(getattr(config.semantic_search, "enabled", False))
attribute_classifications = get_attribute_classifications(config)
tools = get_tool_definitions(
semantic_search_enabled=semantic_search_enabled,
attribute_classifications=attribute_classifications,
)
tools = get_tool_definitions(semantic_search_enabled=semantic_search_enabled)
return JSONResponse(content={"tools": tools})
@ -554,11 +177,14 @@ async def _execute_search_objects(
elif zones is None:
zones = "all"
attribute = arguments.get("attribute")
# Build query parameters compatible with EventsQueryParams
query_params = EventsQueryParams(
cameras=arguments.get("camera", "all"),
labels=arguments.get("label", "all"),
sub_labels=arguments.get("sub_label", "all"), # case-insensitive on the backend
attributes=attribute if attribute else "all",
zones=zones,
zone=zones,
after=after,
@ -626,6 +252,7 @@ async def _execute_search_objects_semantic(
label = arguments.get("label")
sub_label = arguments.get("sub_label")
attribute = arguments.get("attribute")
zones = arguments.get("zones")
if isinstance(zones, list) and zones:
@ -668,6 +295,10 @@ async def _execute_search_objects_semantic(
if sub_label:
# case-insensitive match to mirror events() behavior
clauses.append(fn.LOWER(Event.sub_label.cast("text")) == sub_label.lower())
if attribute:
attribute_clause = _build_attribute_filter_clause(attribute)
if attribute_clause is not None:
clauses.append(attribute_clause)
if zones:
zone_clauses = [Event.zones.cast("text") % f'*"{zone}"*' for zone in zones]
clauses.append(reduce(operator.or_, zone_clauses))
@ -1481,72 +1112,19 @@ async def chat_completion(
config = request.app.frigate_config
semantic_search_enabled = bool(getattr(config.semantic_search, "enabled", False))
tools = get_tool_definitions(semantic_search_enabled=semantic_search_enabled)
attribute_classifications = get_attribute_classifications(config)
tools = get_tool_definitions(
semantic_search_enabled=semantic_search_enabled,
attribute_classifications=attribute_classifications,
)
conversation = []
current_datetime = datetime.now()
current_date_str = current_datetime.strftime("%Y-%m-%d")
current_time_str = current_datetime.strftime("%I:%M:%S %p")
cameras_info = []
has_speed_zone = False
for camera_id in allowed_cameras:
if camera_id not in config.cameras:
continue
camera_config = config.cameras[camera_id]
friendly_name = (
camera_config.friendly_name
if camera_config.friendly_name
else camera_id.replace("_", " ").title()
system_prompt = build_chat_system_prompt(
config=config,
allowed_cameras=allowed_cameras,
semantic_search_enabled=semantic_search_enabled,
attribute_classifications=attribute_classifications,
)
zone_names = list(camera_config.zones.keys())
if not has_speed_zone:
has_speed_zone = any(
zone.distances for zone in camera_config.zones.values()
)
if zone_names:
cameras_info.append(
f" - {friendly_name} (ID: {camera_id}, zones: {', '.join(zone_names)})"
)
else:
cameras_info.append(f" - {friendly_name} (ID: {camera_id})")
cameras_section = ""
if cameras_info:
cameras_section = (
"\n\nAvailable cameras:\n"
+ "\n".join(cameras_info)
+ "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
)
speed_units_section = ""
if has_speed_zone:
speed_unit = (
"mph" if config.ui.unit_system == UnitSystemEnum.imperial else "km/h"
)
speed_units_section = f"\n\nReport object speeds to the user in {speed_unit}."
semantic_search_section = ""
if semantic_search_enabled:
semantic_search_section = (
"\n\nWhen routing a search_objects call, pick filters by the shape of the user's request:\n"
"- Generic class ('show me all cars today'): set `label` only.\n"
"- Specific named entity — a known person ('John'), delivery company ('Amazon'), animal species/breed ('blue jay', 'cardinal', 'golden retriever'), or license plate: set `sub_label` only and leave `label` unset.\n"
"- Physical characteristic, appearance, or activity that is NOT a discrete name ('find me people riding a lawn mower', 'someone in a red jacket', 'a person carrying a package'): set `semantic_query` with the descriptive phrase, optionally combined with `label` for the object class. Never put descriptive phrases in `sub_label`."
)
system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
Current server local date and time: {current_date_str} at {current_time_str}
Do not start your response with phrases like "I will check...", "Let me see...", or "Let me look...". Answer directly.
Always present times to the user in the server's local timezone. When tool results include start_time_local and end_time_local, use those exact strings when listing or describing detection times—do not convert or invent timestamps. Do not use UTC or ISO format with Z for the user-facing answer unless the tool result only provides Unix timestamps without local time fields.
When users ask about "today", "yesterday", "this week", etc., use the current date above as reference.
When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
Always be accurate with time calculations based on the current date provided.
When a user refers to a specific object they have seen or describe with identifying details ("that green car", "the person in the red jacket", "a package left today"), prefer the find_similar_objects tool over search_objects. Use search_objects first only to locate the anchor event, then pass its id to find_similar_objects. For generic queries like "show me all cars today", keep using search_objects. If a user message begins with [attached_event:<id>], treat that event id as the anchor for any similarity or "tell me more" request in the same message and call find_similar_objects with that id.{semantic_search_section}{cameras_section}{speed_units_section}"""
conversation.append(
{
@ -1607,6 +1185,13 @@ When a user refers to a specific object they have seen or describe with identify
)
+ b"\n"
)
elif kind == "reasoning_delta":
yield (
json.dumps({"type": "reasoning", "delta": value}).encode(
"utf-8"
)
+ b"\n"
)
elif kind == "stats":
yield (
json.dumps({"type": "stats", **value}).encode("utf-8")
@ -1707,6 +1292,7 @@ When a user refers to a specific object they have seen or describe with identify
final_content = response.get("content") or ""
if body.stream:
final_reasoning = response.get("reasoning")
async def stream_body() -> Any:
if tool_calls:
@ -1721,6 +1307,15 @@ When a user refers to a specific object they have seen or describe with identify
).encode("utf-8")
+ b"\n"
)
# Emit the full reasoning trace up front when the
# underlying client did not stream it
if final_reasoning:
yield (
json.dumps(
{"type": "reasoning", "delta": final_reasoning}
).encode("utf-8")
+ b"\n"
)
# Stream content in word-sized chunks for smooth UX
for part in chunk_content(final_content):
yield (
@ -1741,6 +1336,7 @@ When a user refers to a specific object they have seen or describe with identify
message=ChatMessageResponse(
role="assistant",
content=final_content,
reasoning=response.get("reasoning"),
tool_calls=None,
),
finish_reason=response.get("finish_reason", "stop"),

View File

@ -20,6 +20,10 @@ class ChatMessageResponse(BaseModel):
content: Optional[str] = Field(
default=None, description="Message content (None if tool calls present)"
)
reasoning: Optional[str] = Field(
default=None,
description="Separated reasoning/thinking trace if the model emitted one",
)
tool_calls: Optional[list[ToolCallInvocation]] = Field(
default=None, description="Tool calls if LLM wants to call tools"
)

View File

@ -1,6 +1,5 @@
"""Generative AI module for Frigate."""
import datetime
import importlib
import json
import logging
@ -9,13 +8,18 @@ import re
from typing import Any, Callable, Optional
import numpy as np
from playhouse.shortcuts import model_to_dict
from pydantic import ValidationError
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
from frigate.const import CLIPS_DIR
from frigate.data_processing.post.types import ReviewMetadata
from frigate.genai.manager import GenAIClientManager
from frigate.genai.prompts import (
build_object_description_prompt,
build_review_description_prompt,
build_review_description_response_format,
build_review_summary_prompt,
)
from frigate.models import Event
logger = logging.getLogger(__name__)
@ -61,75 +65,14 @@ class GenAIClient:
activity_context_prompt: str,
) -> ReviewMetadata | None:
"""Generate a description for the review item activity."""
context_prompt = build_review_description_prompt(
review_data,
thumbnails,
concerns,
preferred_language,
activity_context_prompt,
)
def get_concern_prompt() -> str:
if concerns:
concern_list = "\n - ".join(concerns)
return f"""- `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring:
- {concern_list}"""
else:
return ""
def get_language_prompt() -> str:
if preferred_language:
return f"Provide your answer in {preferred_language}"
else:
return ""
def get_objects_list() -> str:
if review_data["unified_objects"]:
return "\n- " + "\n- ".join(review_data["unified_objects"])
else:
return "\n- (No objects detected)"
context_prompt = f"""
Your task is to analyze a sequence of images taken in chronological order from a security camera.
## Normal Activity Patterns for This Property
{activity_context_prompt}
## Task Instructions
Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
## Analysis Guidelines
When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
## Response Field Guidelines
Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity for example, a vehicle entering the frame or pulling into the driveway even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `` separator in "Objects in Scene"), always use their name do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
## Sequence Details
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
## Objects in Scene
Each line represents a detection state, not necessarily unique individuals. The `` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
**Note: Unidentified objects (without names) are NOT indicators of suspicious activitythey simply mean the system hasn't identified that object.**
{get_objects_list()}
{get_language_prompt()}
"""
logger.debug(
f"Sending {len(thumbnails)} images to create review description on {review_data['camera']}"
)
@ -143,25 +86,7 @@ Each line represents a detection state, not necessarily unique individuals. The
) as f:
f.write(context_prompt)
# Build JSON schema for structured output from ReviewMetadata model
schema = ReviewMetadata.model_json_schema()
schema.get("properties", {}).pop("time", None)
if "time" in schema.get("required", []):
schema["required"].remove("time")
if not concerns:
schema.get("properties", {}).pop("other_concerns", None)
if "other_concerns" in schema.get("required", []):
schema["required"].remove("other_concerns")
response_format = {
"type": "json_schema",
"json_schema": {
"name": "review_metadata",
"strict": True,
"schema": schema,
},
}
response_format = build_review_description_response_format(concerns)
response = self._send(context_prompt, thumbnails, response_format)
@ -240,61 +165,9 @@ Each line represents a detection state, not necessarily unique individuals. The
debug_save: bool,
) -> str | None:
"""Generate a summary of review item descriptions over a period of time."""
time_range = f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')} to {datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
timeline_summary_prompt = f"""
You are a security officer writing a concise security report.
Time range: {time_range}
Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
Report Structure - Use this EXACT format:
# Security Summary - {time_range}
## Overview
[Write 1-2 sentences summarizing the overall activity pattern during this period.]
---
## Timeline
[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
### [Time Block Name]
**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
[Repeat for each event in chronological order within the time block]
---
## Summary
[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: Normal, Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
"""
timeline_summary_prompt += "\n\nEvents:\n"
for event in events:
timeline_summary_prompt += f"\n{event}\n"
if preferred_language:
timeline_summary_prompt += f"\nProvide your answer in {preferred_language}"
timeline_summary_prompt = build_review_summary_prompt(
start_ts, end_ts, events, preferred_language
)
if debug_save:
with open(
@ -326,10 +199,7 @@ Guidelines:
) -> Optional[str]:
"""Generate a description for the frame."""
try:
prompt = camera_config.objects.genai.object_prompts.get(
str(event.label),
camera_config.objects.genai.prompt,
).format(**model_to_dict(event))
prompt = build_object_description_prompt(camera_config, event)
except KeyError as e:
logger.error(f"Invalid key in GenAI prompt: {e}")
return None
@ -430,6 +300,10 @@ Guidelines:
Returns:
Dictionary with:
- 'content': Optional[str] - The text response from the LLM, None if tool calls
- 'reasoning': Optional[str] - The separated reasoning/thinking trace
if the model emitted one (e.g. via OpenAI-compatible
`reasoning_content`). None when the model does not surface a
trace or the provider does not parse it.
- 'tool_calls': Optional[List[Dict]] - List of tool calls if LLM wants to call tools.
Each tool call dict has:
- 'id': str - Unique identifier for this tool call
@ -441,6 +315,14 @@ Guidelines:
- 'length': Hit token limit
- 'error': An error occurred
Streaming counterpart `chat_with_tools_stream` yields
``(kind, value)`` tuples where ``kind`` is one of:
- 'content_delta': value is a string fragment of the answer
- 'reasoning_delta': value is a string fragment of the reasoning
trace (emitted before content for thinking models)
- 'stats': value is a usage stats dict
- 'message': value is the final dict shape described above
Raises:
NotImplementedError: If the provider doesn't implement this method.
"""
@ -451,14 +333,15 @@ Guidelines:
)
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
def load_providers() -> None:
package_dir = os.path.dirname(__file__)
for filename in os.listdir(package_dir):
plugins_dir = os.path.join(os.path.dirname(__file__), "plugins")
for filename in os.listdir(plugins_dir):
if filename.endswith(".py") and filename != "__init__.py":
module_name = f"frigate.genai.{filename[:-3]}"
module_name = f"frigate.genai.plugins.{filename[:-3]}"
importlib.import_module(module_name)

View File

@ -1,315 +0,0 @@
"""Azure OpenAI Provider for Frigate AI."""
import base64
import json
import logging
from typing import Any, AsyncGenerator, Optional
from urllib.parse import parse_qs, urlparse
from openai import AzureOpenAI
from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider
from frigate.genai.openai import _stats_from_openai_usage
logger = logging.getLogger(__name__)
@register_genai_provider(GenAIProviderEnum.azure_openai)
class OpenAIClient(GenAIClient):
"""Generative AI client for Frigate using Azure OpenAI."""
provider: AzureOpenAI
def _init_provider(self) -> AzureOpenAI | None:
"""Initialize the client."""
try:
parsed_url = urlparse(self.genai_config.base_url or "")
query_params = parse_qs(parsed_url.query)
api_version = query_params.get("api-version", [None])[0]
azure_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/"
if not api_version:
logger.warning("Azure OpenAI url is missing API version.")
return None
except Exception as e:
logger.warning("Error parsing Azure OpenAI url: %s", str(e))
return None
return AzureOpenAI(
api_key=self.genai_config.api_key,
api_version=api_version,
azure_endpoint=azure_endpoint,
)
def _send(
self,
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
) -> Optional[str]:
"""Submit a request to Azure OpenAI."""
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
try:
request_params = {
"model": self.genai_config.model,
"messages": [
{
"role": "user",
"content": [{"type": "text", "text": prompt}]
+ [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image}",
"detail": "low",
},
}
for image in encoded_images
],
},
],
"timeout": self.timeout,
**self.genai_config.runtime_options,
}
if response_format:
request_params["response_format"] = response_format
result = self.provider.chat.completions.create(**request_params)
except Exception as e:
logger.warning("Azure OpenAI returned an error: %s", str(e))
return None
if len(result.choices) > 0:
return str(result.choices[0].message.content.strip())
return None
def list_models(self) -> list[str]:
"""Return available model IDs from Azure OpenAI."""
try:
return sorted(m.id for m in self.provider.models.list().data)
except Exception as e:
logger.warning("Failed to list Azure OpenAI models: %s", e)
return []
def get_context_size(self) -> int:
"""Get the context window size for Azure OpenAI."""
return 128000
def chat_with_tools(
self,
messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto",
) -> dict[str, Any]:
try:
openai_tool_choice = None
if tool_choice:
if tool_choice == "none":
openai_tool_choice = "none"
elif tool_choice == "auto":
openai_tool_choice = "auto"
elif tool_choice == "required":
openai_tool_choice = "required"
request_params = {
"model": self.genai_config.model,
"messages": messages,
"timeout": self.timeout,
}
if tools:
request_params["tools"] = tools
if openai_tool_choice is not None:
request_params["tool_choice"] = openai_tool_choice
result = self.provider.chat.completions.create(**request_params) # type: ignore[call-overload]
if (
result is None
or not hasattr(result, "choices")
or len(result.choices) == 0
):
return {
"content": None,
"tool_calls": None,
"finish_reason": "error",
}
choice = result.choices[0]
message = choice.message
content = message.content.strip() if message.content else None
tool_calls = None
if message.tool_calls:
tool_calls = []
for tool_call in message.tool_calls:
try:
arguments = json.loads(tool_call.function.arguments)
except (json.JSONDecodeError, AttributeError) as e:
logger.warning(
f"Failed to parse tool call arguments: {e}, "
f"tool: {tool_call.function.name if hasattr(tool_call.function, 'name') else 'unknown'}"
)
arguments = {}
tool_calls.append(
{
"id": tool_call.id if hasattr(tool_call, "id") else "",
"name": tool_call.function.name
if hasattr(tool_call.function, "name")
else "",
"arguments": arguments,
}
)
finish_reason = "error"
if hasattr(choice, "finish_reason") and choice.finish_reason:
finish_reason = choice.finish_reason
elif tool_calls:
finish_reason = "tool_calls"
elif content:
finish_reason = "stop"
return {
"content": content,
"tool_calls": tool_calls,
"finish_reason": finish_reason,
}
except Exception as e:
logger.warning("Azure OpenAI returned an error: %s", str(e))
return {
"content": None,
"tool_calls": None,
"finish_reason": "error",
}
async def chat_with_tools_stream(
self,
messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto",
) -> AsyncGenerator[tuple[str, Any], None]:
"""
Stream chat with tools; yields content deltas then final message.
Implements streaming function calling/tool usage for Azure OpenAI models.
"""
try:
openai_tool_choice = None
if tool_choice:
if tool_choice == "none":
openai_tool_choice = "none"
elif tool_choice == "auto":
openai_tool_choice = "auto"
elif tool_choice == "required":
openai_tool_choice = "required"
request_params = {
"model": self.genai_config.model,
"messages": messages,
"timeout": self.timeout,
"stream": True,
"stream_options": {"include_usage": True},
}
if tools:
request_params["tools"] = tools
if openai_tool_choice is not None:
request_params["tool_choice"] = openai_tool_choice
# Use streaming API
content_parts: list[str] = []
tool_calls_by_index: dict[int, dict[str, Any]] = {}
finish_reason = "stop"
usage_stats: Optional[dict[str, Any]] = None
stream = self.provider.chat.completions.create(**request_params) # type: ignore[call-overload]
for chunk in stream:
chunk_usage = getattr(chunk, "usage", None)
if chunk_usage is not None:
usage_stats = _stats_from_openai_usage(chunk_usage)
if not chunk or not chunk.choices:
continue
choice = chunk.choices[0]
delta = choice.delta
# Check for finish reason
if choice.finish_reason:
finish_reason = choice.finish_reason
# Extract content deltas
if delta.content:
content_parts.append(delta.content)
yield ("content_delta", delta.content)
# Extract tool calls
if delta.tool_calls:
for tc in delta.tool_calls:
idx = tc.index
fn = tc.function
if idx not in tool_calls_by_index:
tool_calls_by_index[idx] = {
"id": tc.id or "",
"name": fn.name if fn and fn.name else "",
"arguments": "",
}
t = tool_calls_by_index[idx]
if tc.id:
t["id"] = tc.id
if fn and fn.name:
t["name"] = fn.name
if fn and fn.arguments:
t["arguments"] += fn.arguments
# Build final message
full_content = "".join(content_parts).strip() or None
# Convert tool calls to list format
tool_calls_list = None
if tool_calls_by_index:
tool_calls_list = []
for tc in tool_calls_by_index.values():
try:
# Parse accumulated arguments as JSON
parsed_args = json.loads(tc["arguments"])
except (json.JSONDecodeError, Exception):
parsed_args = tc["arguments"]
tool_calls_list.append(
{
"id": tc["id"],
"name": tc["name"],
"arguments": parsed_args,
}
)
finish_reason = "tool_calls"
if usage_stats is not None:
yield ("stats", usage_stats)
yield (
"message",
{
"content": full_content,
"tool_calls": tool_calls_list,
"finish_reason": finish_reason,
},
)
except Exception as e:
logger.warning("Azure OpenAI streaming returned an error: %s", str(e))
yield (
"message",
{
"content": None,
"tool_calls": None,
"finish_reason": "error",
},
)

View File

@ -0,0 +1 @@
"""GenAI provider plugins."""

View File

@ -0,0 +1,53 @@
"""Azure OpenAI Provider for Frigate AI.
Azure OpenAI exposes the same chat completions API as OpenAI once the
client is constructed, so this provider inherits all transport, streaming,
reasoning, and tool-calling logic from :class:`OpenAIClient` and only
overrides what is genuinely Azure-specific:
- Client construction: parses ``api-version`` out of the configured
``base_url`` query string and instantiates :class:`openai.AzureOpenAI`
with ``azure_endpoint`` instead of ``base_url``. Raises if the URL is
malformed; :class:`GenAIClientManager` catches the exception and
disables the provider.
- Context size: Azure does not expose a per-model ``max_model_len`` field
reliably, so we keep the historical 128K default rather than the
model-name heuristic used by OpenAI.
"""
import logging
from urllib.parse import parse_qs, urlparse
from openai import AzureOpenAI
from frigate.config import GenAIProviderEnum
from frigate.genai import register_genai_provider
from frigate.genai.plugins.openai import OpenAIClient
logger = logging.getLogger(__name__)
@register_genai_provider(GenAIProviderEnum.azure_openai)
class AzureOpenAIClient(OpenAIClient):
"""Generative AI client for Frigate using Azure OpenAI."""
def _init_provider(self) -> AzureOpenAI:
"""Initialize the AzureOpenAI client from the configured base_url."""
parsed_url = urlparse(self.genai_config.base_url or "")
query_params = parse_qs(parsed_url.query)
api_version = query_params.get("api-version", [None])[0]
if not api_version:
raise ValueError("Azure OpenAI base_url is missing api-version.")
azure_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/"
return AzureOpenAI(
api_key=self.genai_config.api_key,
api_version=api_version,
azure_endpoint=azure_endpoint,
)
def get_context_size(self) -> int:
"""Azure does not reliably surface per-model context size; use 128K."""
return 128000

View File

@ -248,6 +248,13 @@ class GeminiClient(GenAIClient):
if tool_config:
config_params["tool_config"] = tool_config
# Ask thinking-capable models (Gemini 2.5+) to include their
# reasoning trace as separate `thought` parts so we can surface
# it on the reasoning channel. Older models ignore this field.
config_params["thinking_config"] = types.ThinkingConfig(
include_thoughts=True
)
# Merge runtime_options
if isinstance(self.genai_config.runtime_options, dict):
config_params.update(self.genai_config.runtime_options)
@ -262,18 +269,23 @@ class GeminiClient(GenAIClient):
if not response or not response.candidates:
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
candidate = response.candidates[0]
content = None
reasoning_parts: list[str] = []
tool_calls = None
# Extract content and tool calls from response
# Extract content, reasoning, and tool calls from response
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if part.text:
if getattr(part, "thought", False):
reasoning_parts.append(part.text)
else:
content = part.text.strip()
elif part.function_call:
# Handle function call
@ -297,6 +309,8 @@ class GeminiClient(GenAIClient):
}
)
reasoning = "".join(reasoning_parts).strip() or None
# Determine finish reason
finish_reason = "error"
if hasattr(candidate, "finish_reason") and candidate.finish_reason:
@ -322,6 +336,7 @@ class GeminiClient(GenAIClient):
return {
"content": content,
"reasoning": reasoning,
"tool_calls": tool_calls,
"finish_reason": finish_reason,
}
@ -330,6 +345,7 @@ class GeminiClient(GenAIClient):
logger.warning("Gemini API error during chat_with_tools: %s", str(e))
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
@ -339,6 +355,7 @@ class GeminiClient(GenAIClient):
)
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
@ -477,12 +494,19 @@ class GeminiClient(GenAIClient):
if tool_config:
config_params["tool_config"] = tool_config
# Ask thinking-capable models to include their reasoning trace
# as separate `thought` parts (Gemini 2.5+; ignored elsewhere).
config_params["thinking_config"] = types.ThinkingConfig(
include_thoughts=True
)
# Merge runtime_options
if isinstance(self.genai_config.runtime_options, dict):
config_params.update(self.genai_config.runtime_options)
# Use streaming API
content_parts: list[str] = []
reasoning_parts: list[str] = []
tool_calls_by_index: dict[int, dict[str, Any]] = {}
finish_reason = "stop"
usage_stats: Optional[dict[str, Any]] = None
@ -519,10 +543,14 @@ class GeminiClient(GenAIClient):
]:
finish_reason = "error"
# Extract content and tool calls from chunk
# Extract content, reasoning, and tool calls from chunk
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if part.text:
if getattr(part, "thought", False):
reasoning_parts.append(part.text)
yield ("reasoning_delta", part.text)
else:
content_parts.append(part.text)
yield ("content_delta", part.text)
elif part.function_call:
@ -565,6 +593,7 @@ class GeminiClient(GenAIClient):
# Build final message
full_content = "".join(content_parts).strip() or None
full_reasoning = "".join(reasoning_parts).strip() or None
# Convert tool calls to list format
tool_calls_list = None
@ -593,6 +622,7 @@ class GeminiClient(GenAIClient):
"message",
{
"content": full_content,
"reasoning": full_reasoning,
"tool_calls": tool_calls_list,
"finish_reason": finish_reason,
},
@ -604,6 +634,7 @@ class GeminiClient(GenAIClient):
"message",
{
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
},
@ -616,6 +647,7 @@ class GeminiClient(GenAIClient):
"message",
{
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
},

View File

@ -527,19 +527,28 @@ class LlamaCppClient(GenAIClient):
k: v for k, v in self.provider_options.items() if k != "context_size"
}
payload.update(provider_opts)
payload.update(self.genai_config.runtime_options)
return payload
def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
"""Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
"""Parse OpenAI-style choice into {content, reasoning, tool_calls, finish_reason}.
llama.cpp's `--reasoning-format` puts the trace in
`message.reasoning_content` (preferred) or `message.thinking`; both
keys are accepted so different builds work without configuration.
"""
message = choice.get("message", {})
content = message.get("content")
content = content.strip() if content else None
reasoning = message.get("reasoning_content") or message.get("thinking")
reasoning = reasoning.strip() if reasoning else None
tool_calls = parse_tool_calls_from_message(message)
finish_reason = choice.get("finish_reason") or (
"tool_calls" if tool_calls else "stop" if content else "error"
)
return {
"content": content,
"reasoning": reasoning,
"tool_calls": tool_calls,
"finish_reason": finish_reason,
}
@ -802,6 +811,7 @@ class LlamaCppClient(GenAIClient):
try:
payload = self._build_payload(messages, tools, tool_choice, stream=True)
content_parts: list[str] = []
reasoning_parts: list[str] = []
tool_calls_by_index: dict[int, dict[str, Any]] = {}
finish_reason = "stop"
@ -831,6 +841,15 @@ class LlamaCppClient(GenAIClient):
delta = choices[0].get("delta", {})
if choices[0].get("finish_reason"):
finish_reason = choices[0]["finish_reason"]
# llama.cpp emits separated thinking under
# reasoning_content (preferred) or thinking before any
# content tokens arrive
reasoning_delta = delta.get("reasoning_content") or delta.get(
"thinking"
)
if reasoning_delta:
reasoning_parts.append(reasoning_delta)
yield ("reasoning_delta", reasoning_delta)
if delta.get("content"):
content_parts.append(delta["content"])
yield ("content_delta", delta["content"])
@ -856,6 +875,7 @@ class LlamaCppClient(GenAIClient):
)
full_content = "".join(content_parts).strip() or None
full_reasoning = "".join(reasoning_parts).strip() or None
tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
if tool_calls_list:
finish_reason = "tool_calls"
@ -863,6 +883,7 @@ class LlamaCppClient(GenAIClient):
"message",
{
"content": full_content,
"reasoning": full_reasoning,
"tool_calls": tool_calls_list,
"finish_reason": finish_reason,
},

View File

@ -309,6 +309,7 @@ class OllamaClient(GenAIClient):
"model": self.genai_config.model,
"messages": request_messages,
**self.provider_options,
**self.genai_config.runtime_options,
}
if stream:
request_params["stream"] = True
@ -336,6 +337,9 @@ class OllamaClient(GenAIClient):
response.get("done"),
)
content = message.get("content", "").strip() if message.get("content") else None
reasoning = (
message.get("thinking", "").strip() if message.get("thinking") else None
)
tool_calls = parse_tool_calls_from_message(message)
finish_reason = "error"
if response.get("done"):
@ -348,6 +352,7 @@ class OllamaClient(GenAIClient):
finish_reason = "stop"
return {
"content": content,
"reasoning": reasoning,
"tool_calls": tool_calls,
"finish_reason": finish_reason,
}
@ -431,6 +436,9 @@ class OllamaClient(GenAIClient):
)
response = await async_client.chat(**request_params)
result = self._message_from_response(response)
reasoning = result.get("reasoning")
if reasoning:
yield ("reasoning_delta", reasoning)
content = result.get("content")
if content:
yield ("content_delta", content)
@ -449,6 +457,7 @@ class OllamaClient(GenAIClient):
headers=self._auth_headers(),
)
content_parts: list[str] = []
reasoning_parts: list[str] = []
final_message: dict[str, Any] | None = None
final_chunk: Any = None
stream = await async_client.chat(**request_params)
@ -456,6 +465,10 @@ class OllamaClient(GenAIClient):
if not chunk or "message" not in chunk:
continue
msg = chunk.get("message", {})
reasoning_delta = msg.get("thinking") or ""
if reasoning_delta:
reasoning_parts.append(reasoning_delta)
yield ("reasoning_delta", reasoning_delta)
delta = msg.get("content") or ""
if delta:
content_parts.append(delta)
@ -463,8 +476,10 @@ class OllamaClient(GenAIClient):
if chunk.get("done"):
final_chunk = chunk
full_content = "".join(content_parts).strip() or None
full_reasoning = "".join(reasoning_parts).strip() or None
final_message = {
"content": full_content,
"reasoning": full_reasoning,
"tool_calls": None,
"finish_reason": "stop",
}
@ -481,6 +496,7 @@ class OllamaClient(GenAIClient):
"message",
{
"content": "".join(content_parts).strip() or None,
"reasoning": "".join(reasoning_parts).strip() or None,
"tool_calls": None,
"finish_reason": "stop",
},

View File

@ -38,7 +38,11 @@ class OpenAIClient(GenAIClient):
context_size: Optional[int] = None
def _init_provider(self) -> OpenAI:
"""Initialize the client."""
"""Initialize the client.
Subclasses (e.g. Azure) should raise on configuration errors; the
manager catches construction failures and disables the provider.
"""
# Extract context_size from provider_options as it's not a valid OpenAI client parameter
# It will be used in get_context_size() instead
provider_opts = {
@ -236,6 +240,10 @@ class OpenAIClient(GenAIClient):
choice = result.choices[0]
message = choice.message
content = message.content.strip() if message.content else None
raw_reasoning = getattr(message, "reasoning_content", None) or getattr(
message, "reasoning", None
)
reasoning = raw_reasoning.strip() if raw_reasoning else None
tool_calls = None
if message.tool_calls:
@ -270,6 +278,7 @@ class OpenAIClient(GenAIClient):
return {
"content": content,
"reasoning": reasoning,
"tool_calls": tool_calls,
"finish_reason": finish_reason,
}
@ -278,6 +287,7 @@ class OpenAIClient(GenAIClient):
logger.warning("OpenAI request timed out: %s", str(e))
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
@ -285,6 +295,7 @@ class OpenAIClient(GenAIClient):
logger.warning("OpenAI returned an error: %s", str(e))
return {
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
}
@ -335,6 +346,7 @@ class OpenAIClient(GenAIClient):
# Use streaming API
content_parts: list[str] = []
reasoning_parts: list[str] = []
tool_calls_by_index: dict[int, dict[str, Any]] = {}
finish_reason = "stop"
usage_stats: Optional[dict[str, Any]] = None
@ -356,6 +368,15 @@ class OpenAIClient(GenAIClient):
if choice.finish_reason:
finish_reason = choice.finish_reason
# Extract reasoning deltas (reasoning_content or reasoning,
# depending on the server)
reasoning_delta = getattr(delta, "reasoning_content", None) or getattr(
delta, "reasoning", None
)
if reasoning_delta:
reasoning_parts.append(reasoning_delta)
yield ("reasoning_delta", reasoning_delta)
# Extract content deltas
if delta.content:
content_parts.append(delta.content)
@ -384,6 +405,7 @@ class OpenAIClient(GenAIClient):
# Build final message
full_content = "".join(content_parts).strip() or None
full_reasoning = "".join(reasoning_parts).strip() or None
# Convert tool calls to list format
tool_calls_list = None
@ -412,6 +434,7 @@ class OpenAIClient(GenAIClient):
"message",
{
"content": full_content,
"reasoning": full_reasoning,
"tool_calls": tool_calls_list,
"finish_reason": finish_reason,
},
@ -423,6 +446,7 @@ class OpenAIClient(GenAIClient):
"message",
{
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
},
@ -433,6 +457,7 @@ class OpenAIClient(GenAIClient):
"message",
{
"content": None,
"reasoning": None,
"tool_calls": None,
"finish_reason": "error",
},

739
frigate/genai/prompts.py Normal file
View File

@ -0,0 +1,739 @@
"""Prompt and response-format builders for GenAI features.
Centralizes the per-feature prompt framing and structured-output schema
shaping so provider clients in :mod:`frigate.genai.plugins` only handle
transport.
"""
import datetime
from typing import Any, Dict, List, Optional
from playhouse.shortcuts import model_to_dict
from frigate.config import CameraConfig, FrigateConfig
from frigate.config.classification import ObjectClassificationType
from frigate.config.ui import UnitSystemEnum
from frigate.data_processing.post.types import ReviewMetadata
from frigate.models import Event
def build_review_description_prompt(
review_data: dict[str, Any],
thumbnails: list[bytes],
concerns: list[str],
preferred_language: str | None,
activity_context_prompt: str,
) -> str:
"""Build the prompt for review activity description generation."""
def get_concern_prompt() -> str:
if concerns:
concern_list = "\n - ".join(concerns)
return (
"\n- `other_concerns` (list of strings): Include a list of any of "
"the following concerns that are occurring:\n"
f" - {concern_list}"
)
else:
return ""
def get_language_prompt() -> str:
if preferred_language:
return f"Provide your answer in {preferred_language}"
else:
return ""
def get_objects_list() -> str:
if review_data["unified_objects"]:
return "\n- " + "\n- ".join(review_data["unified_objects"])
else:
return "\n- (No objects detected)"
return f"""
Your task is to analyze a sequence of images taken in chronological order from a security camera.
## Normal Activity Patterns for This Property
{activity_context_prompt}
## Task Instructions
Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
## Analysis Guidelines
When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
## Response Field Guidelines
Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity for example, a vehicle entering the frame or pulling into the driveway even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `` separator in "Objects in Scene"), always use their name do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
{get_concern_prompt()}
## Sequence Details
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
## Objects in Scene
Each line represents a detection state, not necessarily unique individuals. The `` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
**Note: Unidentified objects (without names) are NOT indicators of suspicious activitythey simply mean the system hasn't identified that object.**
{get_objects_list()}
{get_language_prompt()}
"""
def build_review_description_response_format(concerns: list[str]) -> dict[str, Any]:
"""Build the structured-output JSON schema for review descriptions.
Strips the `time` field (populated server-side) and drops
`other_concerns` when no concerns are configured.
"""
schema = ReviewMetadata.model_json_schema()
schema.get("properties", {}).pop("time", None)
if "time" in schema.get("required", []):
schema["required"].remove("time")
if not concerns:
schema.get("properties", {}).pop("other_concerns", None)
if "other_concerns" in schema.get("required", []):
schema["required"].remove("other_concerns")
return {
"type": "json_schema",
"json_schema": {
"name": "review_metadata",
"strict": True,
"schema": schema,
},
}
def build_review_summary_prompt(
start_ts: float,
end_ts: float,
events: list[dict[str, Any]],
preferred_language: str | None,
) -> str:
"""Build the prompt for a multi-event review summary."""
time_range = (
f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')}"
f" to "
f"{datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
)
prompt = f"""
You are a security officer writing a concise security report.
Time range: {time_range}
Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
Report Structure - Use this EXACT format:
# Security Summary - {time_range}
## Overview
[Write 1-2 sentences summarizing the overall activity pattern during this period.]
---
## Timeline
[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
### [Time Block Name]
**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
[Repeat for each event in chronological order within the time block]
---
## Summary
[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: Normal, Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
"""
prompt += "\n\nEvents:\n"
for event in events:
prompt += f"\n{event}\n"
if preferred_language:
prompt += f"\nProvide your answer in {preferred_language}"
return prompt
def build_object_description_prompt(
camera_config: CameraConfig,
event: Event,
) -> str:
"""Build the prompt for a per-object description.
Pulls the per-label override from `objects.genai.object_prompts`, falling
back to the camera default, and interpolates event fields.
Raises:
KeyError: if the user-defined prompt template references an unknown
event field.
"""
template = camera_config.objects.genai.object_prompts.get(
str(event.label),
camera_config.objects.genai.prompt,
)
return template.format(**model_to_dict(event))
def get_attribute_classifications(config: FrigateConfig) -> List[Dict[str, Any]]:
"""Return enabled custom classification models of `attribute` type.
Each entry: {"name": <model name>, "objects": [<object label>, ...]}.
These models attach attribute metadata to events on the listed object
types, which can later be filtered via the search_objects `attribute`
field.
"""
result: List[Dict[str, Any]] = []
for model_key, model_config in config.classification.custom.items():
if not model_config.enabled or model_config.object_config is None:
continue
if (
model_config.object_config.classification_type
!= ObjectClassificationType.attribute
):
continue
result.append(
{
"name": model_config.name or model_key,
"objects": list(model_config.object_config.objects or []),
}
)
return result
def get_tool_definitions(
semantic_search_enabled: bool = False,
attribute_classifications: Optional[List[Dict[str, Any]]] = None,
) -> List[Dict[str, Any]]:
"""
Get OpenAI-compatible tool definitions for Frigate.
Returns a list of tool definitions that can be used with OpenAI-compatible
function calling APIs. When semantic search is enabled, the search_objects
tool exposes an additional `semantic_query` parameter for descriptive
queries (e.g. "person riding a lawn mower") and find_similar_objects is
included. When attribute classification models are configured, an
`attribute` parameter is exposed for filtering by their labels.
"""
search_objects_properties: Dict[str, Any] = {
"camera": {
"type": "string",
"description": "Camera name to filter by (optional).",
},
"label": {
"type": "string",
"description": (
"Generic object class to filter by — one of the tracked detector "
"labels such as 'person', 'package', 'car', 'dog', 'bird'. Use "
"this for broad queries like 'show me all cars today'. Combine "
"with semantic_query when the user also describes appearance or "
"behavior (e.g. label='person', semantic_query='riding a lawn "
"mower')."
),
},
"sub_label": {
"type": "string",
"description": (
"Filter by a DISCRETE NAMED entity recognized in the detection. "
"Use this for: a known person's name ('John'), a delivery "
"company ('Amazon', 'UPS'), a recognized animal species or "
"breed ('blue jay', 'cardinal', 'golden retriever'), or a "
"license plate string. When filtering by a specific name, set "
"only sub_label and leave label unset. Do NOT use sub_label "
"for descriptions of appearance, clothing, or actions — those "
"belong in semantic_query."
),
},
"after": {
"type": "string",
"description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
},
"before": {
"type": "string",
"description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "List of zone names to filter by.",
},
"limit": {
"type": "integer",
"description": "Maximum number of objects to return (default: 25).",
"default": 25,
},
}
if attribute_classifications:
model_outline = "; ".join(
f"{m['name']} (applies to {', '.join(m['objects']) or 'any object'})"
for m in attribute_classifications
)
search_objects_properties["attribute"] = {
"type": "string",
"description": (
"Filter by a classification attribute label produced by a "
"configured attribute classification model. Use this INSTEAD "
"of semantic_query when the user's request matches one of "
"these classifications. Configured models: "
f"{model_outline}. "
"Set the value to the attribute label that matches the user's "
"phrasing (case-sensitive)."
),
}
if semantic_search_enabled:
search_objects_properties["semantic_query"] = {
"type": "string",
"description": (
"Optional natural-language description of a PHYSICAL "
"CHARACTERISTIC, APPEARANCE, or ACTIVITY the user mentioned, "
"used to semantically narrow results. Only set this when the "
"user describes something beyond what label and sub_label can "
"express on their own.\n"
"USE for descriptive phrases like: 'riding a lawn mower', "
"'wearing a red jacket', 'carrying a package', 'walking a "
"dog', 'on a bicycle', 'holding an umbrella'.\n"
"DO NOT USE for:\n"
"- specific named people, pets, or delivery companies → use sub_label\n"
"- animal species or breed names like 'blue jay', 'cardinal', "
"'golden retriever' → use sub_label\n"
"- license plate strings → use sub_label\n"
"- generic object queries like 'all cars today' or 'every "
"person' → use label alone with no semantic_query\n"
"When set, combine with label/time/camera/zone filters as "
"usual (e.g. label='person', semantic_query='riding a lawn "
"mower', after='2024-05-01T00:00:00Z')."
),
}
search_objects_description = (
"Search the historical record of detected objects in Frigate. "
"Use this ONLY for questions about the PAST — e.g. 'did anyone come by today?', "
"'when was the last car?', 'show me detections from yesterday'. "
"Do NOT use this for monitoring or alerting requests about future events — "
"use start_camera_watch instead for those. "
"An 'object' in Frigate represents a tracked detection (e.g., a person, package, car).\n\n"
"Choose filters based on what the user is asking for:\n"
"- Generic class query ('show me all cars today'): set `label` only.\n"
"- Specific NAMED entity (known person, delivery company, animal "
"species/breed like 'blue jay' or 'golden retriever', license "
"plate): set `sub_label` only and leave `label` unset.\n"
)
if semantic_search_enabled:
search_objects_description += (
"- Physical CHARACTERISTIC, APPEARANCE, or ACTIVITY that is not a "
"discrete name ('person riding a lawn mower', 'someone in a red "
"jacket', 'person carrying a package'): set `semantic_query` with "
"the descriptive phrase, optionally alongside `label` for the "
"object class. Do NOT put descriptive phrases in sub_label."
)
return [
{
"type": "function",
"function": {
"name": "search_objects",
"description": search_objects_description,
"parameters": {
"type": "object",
"properties": search_objects_properties,
},
"required": [],
},
},
{
"type": "function",
"function": {
"name": "find_similar_objects",
"description": (
"Find tracked objects that are visually and semantically similar "
"to a specific past event. Use this when the user references a "
"particular object they have seen and wants to find other "
"sightings of the same or similar one ('that green car', 'the "
"person in the red jacket', 'the package that was delivered'). "
"Prefer this over search_objects whenever the user's intent is "
"'find more like this specific one.' Use search_objects first "
"only if you need to locate the anchor event. Requires semantic "
"search to be enabled."
),
"parameters": {
"type": "object",
"properties": {
"event_id": {
"type": "string",
"description": "The id of the anchor event to find similar objects to.",
},
"after": {
"type": "string",
"description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
},
"before": {
"type": "string",
"description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
},
"cameras": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of cameras to restrict to. Defaults to all.",
},
"labels": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of labels to restrict to. Defaults to the anchor event's label.",
},
"sub_labels": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of sub_labels (names) to restrict to.",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of zones. An event matches if any of its zones overlap.",
},
"similarity_mode": {
"type": "string",
"enum": ["visual", "semantic", "fused"],
"description": "Which similarity signal(s) to use. 'fused' (default) combines visual and semantic.",
"default": "fused",
},
"min_score": {
"type": "number",
"description": "Drop matches with a similarity score below this threshold (0.0-1.0).",
},
"limit": {
"type": "integer",
"description": "Maximum number of matches to return (default: 10).",
"default": 10,
},
},
"required": ["event_id"],
},
},
},
{
"type": "function",
"function": {
"name": "set_camera_state",
"description": (
"Change a camera's feature state (e.g., turn detection on/off, enable/disable recordings). "
"Use camera='*' to apply to all cameras at once. "
"Only call this tool when the user explicitly asks to change a camera setting. "
"Requires admin privileges."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera name to target, or '*' to target all cameras.",
},
"feature": {
"type": "string",
"enum": [
"detect",
"record",
"snapshots",
"audio",
"motion",
"enabled",
"birdseye",
"birdseye_mode",
"improve_contrast",
"ptz_autotracker",
"motion_contour_area",
"motion_threshold",
"notifications",
"audio_transcription",
"review_alerts",
"review_detections",
"object_descriptions",
"review_descriptions",
"profile",
],
"description": (
"The feature to change. Most features accept ON or OFF. "
"birdseye_mode accepts CONTINUOUS, MOTION, or OBJECTS. "
"motion_contour_area and motion_threshold accept a number. "
"profile accepts a profile name or 'none' to deactivate (requires camera='*')."
),
},
"value": {
"type": "string",
"description": "The value to set. ON or OFF for toggles, a number for thresholds, a profile name or 'none' for profile.",
},
},
"required": ["camera", "feature", "value"],
},
},
},
{
"type": "function",
"function": {
"name": "get_live_context",
"description": (
"Get the current live image and detection information for a camera: objects being tracked, "
"zones, timestamps. Use this to understand what is visible in the live view. "
"Call this when answering questions about what is happening right now on a specific camera."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera name to get live context for.",
},
},
"required": ["camera"],
},
},
},
{
"type": "function",
"function": {
"name": "start_camera_watch",
"description": (
"Start a continuous VLM watch job that monitors a camera and sends a notification "
"when a specified condition is met. Use this when the user wants to be alerted about "
"a future event, e.g. 'tell me when guests arrive' or 'notify me when the package is picked up'. "
"Only one watch job can run at a time. Returns a job ID."
),
"parameters": {
"type": "object",
"properties": {
"camera": {
"type": "string",
"description": "Camera ID to monitor.",
},
"condition": {
"type": "string",
"description": (
"Natural-language description of the condition to watch for, "
"e.g. 'a person arrives at the front door'."
),
},
"max_duration_minutes": {
"type": "integer",
"description": "Maximum time to watch before giving up (minutes, default 60).",
"default": 60,
},
"labels": {
"type": "array",
"items": {"type": "string"},
"description": "Object labels that should trigger a VLM check (e.g. ['person', 'car']). If omitted, any detection on the camera triggers a check.",
},
"zones": {
"type": "array",
"items": {"type": "string"},
"description": "Zone names to filter by. If specified, only detections in these zones trigger a VLM check.",
},
},
"required": ["camera", "condition"],
},
},
},
{
"type": "function",
"function": {
"name": "stop_camera_watch",
"description": (
"Cancel the currently running VLM watch job. Use this when the user wants to "
"stop a previously started watch, e.g. 'stop watching the front door'."
),
"parameters": {
"type": "object",
"properties": {},
"required": [],
},
},
},
{
"type": "function",
"function": {
"name": "get_profile_status",
"description": (
"Get the current profile status including the active profile and "
"timestamps of when each profile was last activated. Use this to "
"determine time periods for recap requests — e.g. when the user asks "
"'what happened while I was away?', call this first to find the relevant "
"time window based on profile activation history."
),
"parameters": {
"type": "object",
"properties": {},
"required": [],
},
},
},
{
"type": "function",
"function": {
"name": "get_recap",
"description": (
"Get a recap of all activity (alerts and detections) for a given time period. "
"Use this after calling get_profile_status to retrieve what happened during "
"a specific window — e.g. 'what happened while I was away?'. Returns a "
"chronological list of activity with camera, objects, zones, and GenAI-generated "
"descriptions when available. Summarize the results for the user."
),
"parameters": {
"type": "object",
"properties": {
"after": {
"type": "string",
"description": "Start of the time period in ISO 8601 format (e.g. '2025-03-15T08:00:00').",
},
"before": {
"type": "string",
"description": "End of the time period in ISO 8601 format (e.g. '2025-03-15T17:00:00').",
},
"cameras": {
"type": "string",
"description": "Comma-separated camera IDs to include, or 'all' for all cameras. Default is 'all'.",
},
"severity": {
"type": "string",
"enum": ["alert", "detection"],
"description": "Filter by severity level. Omit to include both alerts and detections.",
},
},
"required": ["after", "before"],
},
},
},
]
def build_chat_system_prompt(
config: FrigateConfig,
allowed_cameras: List[str],
semantic_search_enabled: bool,
attribute_classifications: List[Dict[str, Any]],
) -> str:
"""Build the system prompt for the chat completion endpoint.
Composes the static framing with conditional sections describing the
available cameras, speed units, semantic-search routing guidance, and
configured attribute classifications.
"""
current_datetime = datetime.datetime.now()
current_date_str = current_datetime.strftime("%Y-%m-%d")
current_time_str = current_datetime.strftime("%I:%M:%S %p")
cameras_info: List[str] = []
has_speed_zone = False
for camera_id in allowed_cameras:
if camera_id not in config.cameras:
continue
camera_config = config.cameras[camera_id]
friendly_name = (
camera_config.friendly_name
if camera_config.friendly_name
else camera_id.replace("_", " ").title()
)
zone_names = list(camera_config.zones.keys())
if not has_speed_zone:
has_speed_zone = any(
zone.distances for zone in camera_config.zones.values()
)
if zone_names:
cameras_info.append(
f" - {friendly_name} (ID: {camera_id}, zones: {', '.join(zone_names)})"
)
else:
cameras_info.append(f" - {friendly_name} (ID: {camera_id})")
cameras_section = ""
if cameras_info:
cameras_section = (
"\n\nAvailable cameras:\n"
+ "\n".join(cameras_info)
+ "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
)
speed_units_section = ""
if has_speed_zone:
speed_unit = (
"mph" if config.ui.unit_system == UnitSystemEnum.imperial else "km/h"
)
speed_units_section = f"\n\nReport object speeds to the user in {speed_unit}."
semantic_search_section = ""
if semantic_search_enabled:
semantic_search_section = (
"\n\nWhen routing a search_objects call, pick filters by the shape of the user's request:\n"
"- Generic class ('show me all cars today'): set `label` only.\n"
"- Specific named entity — a known person ('John'), delivery company ('Amazon'), animal species/breed ('blue jay', 'cardinal', 'golden retriever'), or license plate: set `sub_label` only and leave `label` unset.\n"
"- Physical characteristic, appearance, or activity that is NOT a discrete name ('find me people riding a lawn mower', 'someone in a red jacket', 'a person carrying a package'): set `semantic_query` with the descriptive phrase, optionally combined with `label` for the object class. Never put descriptive phrases in `sub_label`."
)
attribute_classification_section = ""
if attribute_classifications:
model_lines = "\n".join(
f"- {m['name']}: applies to {', '.join(m['objects']) or 'any object'}"
for m in attribute_classifications
)
attribute_classification_section = (
"\n\nAttribute classification models are configured for the following object types:\n"
f"{model_lines}\n"
"When the user's request matches one of these classifications, set the search_objects `attribute` field to the matching label rather than using `semantic_query`. Reserve `semantic_query` for descriptive phrases that fall outside the configured attribute labels."
)
return f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
Current server local date and time: {current_date_str} at {current_time_str}
Do not start your response with phrases like "I will check...", "Let me see...", or "Let me look...". Answer directly.
Always present times to the user in the server's local timezone. When tool results include start_time_local and end_time_local, use those exact strings when listing or describing detection times—do not convert or invent timestamps. Do not use UTC or ISO format with Z for the user-facing answer unless the tool result only provides Unix timestamps without local time fields.
When users ask about "today", "yesterday", "this week", etc., use the current date above as reference.
When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
Always be accurate with time calculations based on the current date provided.
When a user refers to a specific object they have seen or describe with identifying details ("that green car", "the person in the red jacket", "a package left today"), prefer the find_similar_objects tool over search_objects. Use search_objects first only to locate the anchor event, then pass its id to find_similar_objects. For generic queries like "show me all cars today", keep using search_objects. If a user message begins with [attached_event:<id>], treat that event id as the anchor for any similarity or "tell me more" request in the same message and call find_similar_objects with that id.{semantic_search_section}{attribute_classification_section}{cameras_section}{speed_units_section}"""

View File

@ -60,5 +60,10 @@
"stats": {
"context": "{{tokens}} tokens",
"tokens_per_second": "{{rate}} t/s"
},
"reasoning": {
"active": "Reasoning…",
"show": "Show reasoning",
"hide": "Hide reasoning"
}
}

View File

@ -0,0 +1,87 @@
import { useState, useEffect, useRef } from "react";
import { useTranslation } from "react-i18next";
import { LuBrain, LuChevronDown, LuChevronRight } from "react-icons/lu";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
type ReasoningBubbleProps = {
/** The accumulated reasoning text from the model. */
reasoning: string;
/**
* Whether the assistant has begun producing the user-facing answer.
* While false the reasoning is still streaming and we keep the panel
* open with a "Thinking…" label. Once true, the panel auto-collapses
* so the answer is the primary focus, but stays expandable.
*/
answerStarted: boolean;
};
export function ReasoningBubble({
reasoning,
answerStarted,
}: ReasoningBubbleProps) {
const { t } = useTranslation(["views/chat"]);
// Open while the model is still mid-thought (no answer tokens yet);
// once the answer begins, collapse on its own but let the user reopen.
const [open, setOpen] = useState(true);
const userInteractedRef = useRef(false);
const lastAutoState = useRef(true);
useEffect(() => {
if (userInteractedRef.current) return;
const desired = !answerStarted;
if (desired !== lastAutoState.current) {
lastAutoState.current = desired;
setOpen(desired);
}
}, [answerStarted]);
const handleOpenChange = (next: boolean) => {
userInteractedRef.current = true;
setOpen(next);
};
const label = !answerStarted
? t("reasoning.active")
: open
? t("reasoning.hide")
: t("reasoning.show");
return (
<div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
<Collapsible open={open} onOpenChange={handleOpenChange}>
<CollapsibleTrigger asChild>
<Button
variant="ghost"
size="sm"
className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
>
<LuBrain
className={cn(
"size-3 shrink-0",
!answerStarted && "animate-pulse",
)}
/>
<span className="break-words font-medium">{label}</span>
{answerStarted &&
(open ? (
<LuChevronDown className="ml-auto size-3 shrink-0" />
) : (
<LuChevronRight className="ml-auto size-3 shrink-0" />
))}
</Button>
</CollapsibleTrigger>
<CollapsibleContent>
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
{reasoning}
</pre>
</CollapsibleContent>
</Collapsible>
</div>
);
}

View File

@ -7,6 +7,7 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react";
import axios from "axios";
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
import { MessageBubble } from "@/components/chat/ChatMessage";
import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
import { ChatStartingState } from "@/components/chat/ChatStartingState";
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
@ -200,15 +201,21 @@ export default function ChatPage() {
const hasToolCalls =
msg.toolCalls && msg.toolCalls.length > 0;
const hasContent = !!msg.content?.trim();
const hasReasoning = !!msg.reasoning?.trim();
const showProcessing =
isLastAssistant && isLoading && !hasContent;
isLastAssistant &&
isLoading &&
!hasContent &&
!hasReasoning;
// Hide empty placeholder only when there are no tool calls yet
// Hide empty placeholder only when there are no tool calls
// and no reasoning streaming yet
if (
isLastAssistant &&
isLoading &&
!hasContent &&
!hasToolCalls
!hasToolCalls &&
!hasReasoning
)
return (
<div
@ -226,13 +233,22 @@ export default function ChatPage() {
{msg.role === "assistant" && hasToolCalls && (
<ToolCallsGroup toolCalls={msg.toolCalls!} />
)}
{msg.role === "assistant" && hasReasoning && (
<ReasoningBubble
reasoning={msg.reasoning!}
answerStarted={hasContent}
/>
)}
{showProcessing ? (
<div className="flex items-center gap-2 self-start rounded-2xl bg-muted px-5 py-4">
<span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.3s]" />
<span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.15s]" />
<span className="size-2 animate-bounce rounded-full bg-muted-foreground/60" />
</div>
) : (
) : msg.role === "assistant" &&
!hasContent &&
hasReasoning &&
!isComplete ? null : (
<MessageBubble
role={msg.role}
content={msg.content}

View File

@ -7,6 +7,7 @@ export type ToolCall = {
export type ChatMessage = {
role: "user" | "assistant";
content: string;
reasoning?: string;
toolCalls?: ToolCall[];
stats?: ChatStats;
};

View File

@ -27,6 +27,7 @@ type StreamChunk =
| { type: "error"; error: string }
| { type: "tool_calls"; tool_calls: ToolCall[] }
| { type: "content"; delta: string }
| { type: "reasoning"; delta: string }
| StatsChunk;
/**
@ -109,6 +110,19 @@ export async function streamChatCompletion(
});
return "continue";
}
if (data.type === "reasoning" && data.delta !== undefined) {
updateMessages((prev) => {
const next = [...prev];
const lastMsg = next[next.length - 1];
if (lastMsg?.role === "assistant")
next[next.length - 1] = {
...lastMsg,
reasoning: (lastMsg.reasoning ?? "") + data.delta,
};
return next;
});
return "continue";
}
if (data.type === "stats") {
const stats: ChatStats = {
promptTokens: data.prompt_tokens,