GenAI Refactor (#23253)

* Ensure runtime options are passed * Add attribute info to prompt when configured * Move GenAI plugins to dedicated directory * Migrate prompts to dedicated folder * Move chat prompts to prompts * Implement reasoning traces in the UI * Cleanup * Make azure a subclass of openai * Implement reasoning for other providers * mypy * Cleanup
2026-06-21 03:41:55 +03:00 · 2026-05-19 12:03:57 -06:00 · 2026-05-19 12:03:57 -06:00 · b0b00fe1d0
commit b0b00fe1d0
parent b1de5e2290
16 changed files with 1108 additions and 930 deletions
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@ -35,9 +35,13 @@ from frigate.api.defs.response.chat_response import (
    ToolCall,
 )
 from frigate.api.defs.tags import Tags
-from frigate.api.event import events
+from frigate.api.event import _build_attribute_filter_clause, events
 from frigate.config import FrigateConfig
-from frigate.config.ui import UnitSystemEnum
+from frigate.genai.prompts import (
+    build_chat_system_prompt,
+    get_attribute_classifications,
+    get_tool_definitions,
+)
 from frigate.genai.utils import build_assistant_message_for_conversation
 from frigate.jobs.vlm_watch import (
    get_vlm_watch_job,
@ -68,390 +72,6 @@ class VLMMonitorRequest(BaseModel):
    zones: List[str] = []


-def get_tool_definitions(
-    semantic_search_enabled: bool = False,
-) -> List[Dict[str, Any]]:
-    """
-    Get OpenAI-compatible tool definitions for Frigate.
-
-    Returns a list of tool definitions that can be used with OpenAI-compatible
-    function calling APIs. When semantic search is enabled, the search_objects
-    tool exposes an additional `semantic_query` parameter for descriptive
-    queries (e.g. "person riding a lawn mower") and find_similar_objects is
-    included.
-    """
-    search_objects_properties: Dict[str, Any] = {
-        "camera": {
-            "type": "string",
-            "description": "Camera name to filter by (optional).",
-        },
-        "label": {
-            "type": "string",
-            "description": (
-                "Generic object class to filter by — one of the tracked detector "
-                "labels such as 'person', 'package', 'car', 'dog', 'bird'. Use "
-                "this for broad queries like 'show me all cars today'. Combine "
-                "with semantic_query when the user also describes appearance or "
-                "behavior (e.g. label='person', semantic_query='riding a lawn "
-                "mower')."
-            ),
-        },
-        "sub_label": {
-            "type": "string",
-            "description": (
-                "Filter by a DISCRETE NAMED entity recognized in the detection. "
-                "Use this for: a known person's name ('John'), a delivery "
-                "company ('Amazon', 'UPS'), a recognized animal species or "
-                "breed ('blue jay', 'cardinal', 'golden retriever'), or a "
-                "license plate string. When filtering by a specific name, set "
-                "only sub_label and leave label unset. Do NOT use sub_label "
-                "for descriptions of appearance, clothing, or actions — those "
-                "belong in semantic_query."
-            ),
-        },
-        "after": {
-            "type": "string",
-            "description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
-        },
-        "before": {
-            "type": "string",
-            "description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
-        },
-        "zones": {
-            "type": "array",
-            "items": {"type": "string"},
-            "description": "List of zone names to filter by.",
-        },
-        "limit": {
-            "type": "integer",
-            "description": "Maximum number of objects to return (default: 25).",
-            "default": 25,
-        },
-    }
-
-    if semantic_search_enabled:
-        search_objects_properties["semantic_query"] = {
-            "type": "string",
-            "description": (
-                "Optional natural-language description of a PHYSICAL "
-                "CHARACTERISTIC, APPEARANCE, or ACTIVITY the user mentioned, "
-                "used to semantically narrow results. Only set this when the "
-                "user describes something beyond what label and sub_label can "
-                "express on their own.\n"
-                "USE for descriptive phrases like: 'riding a lawn mower', "
-                "'wearing a red jacket', 'carrying a package', 'walking a "
-                "dog', 'on a bicycle', 'holding an umbrella'.\n"
-                "DO NOT USE for:\n"
-                "- specific named people, pets, or delivery companies → use sub_label\n"
-                "- animal species or breed names like 'blue jay', 'cardinal', "
-                "'golden retriever' → use sub_label\n"
-                "- license plate strings → use sub_label\n"
-                "- generic object queries like 'all cars today' or 'every "
-                "person' → use label alone with no semantic_query\n"
-                "When set, combine with label/time/camera/zone filters as "
-                "usual (e.g. label='person', semantic_query='riding a lawn "
-                "mower', after='2024-05-01T00:00:00Z')."
-            ),
-        }
-
-    search_objects_description = (
-        "Search the historical record of detected objects in Frigate. "
-        "Use this ONLY for questions about the PAST — e.g. 'did anyone come by today?', "
-        "'when was the last car?', 'show me detections from yesterday'. "
-        "Do NOT use this for monitoring or alerting requests about future events — "
-        "use start_camera_watch instead for those. "
-        "An 'object' in Frigate represents a tracked detection (e.g., a person, package, car).\n\n"
-        "Choose filters based on what the user is asking for:\n"
-        "- Generic class query ('show me all cars today'): set `label` only.\n"
-        "- Specific NAMED entity (known person, delivery company, animal "
-        "species/breed like 'blue jay' or 'golden retriever', license "
-        "plate): set `sub_label` only and leave `label` unset.\n"
-    )
-    if semantic_search_enabled:
-        search_objects_description += (
-            "- Physical CHARACTERISTIC, APPEARANCE, or ACTIVITY that is not a "
-            "discrete name ('person riding a lawn mower', 'someone in a red "
-            "jacket', 'person carrying a package'): set `semantic_query` with "
-            "the descriptive phrase, optionally alongside `label` for the "
-            "object class. Do NOT put descriptive phrases in sub_label."
-        )
-
-    return [
-        {
-            "type": "function",
-            "function": {
-                "name": "search_objects",
-                "description": search_objects_description,
-                "parameters": {
-                    "type": "object",
-                    "properties": search_objects_properties,
-                },
-                "required": [],
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "find_similar_objects",
-                "description": (
-                    "Find tracked objects that are visually and semantically similar "
-                    "to a specific past event. Use this when the user references a "
-                    "particular object they have seen and wants to find other "
-                    "sightings of the same or similar one ('that green car', 'the "
-                    "person in the red jacket', 'the package that was delivered'). "
-                    "Prefer this over search_objects whenever the user's intent is "
-                    "'find more like this specific one.' Use search_objects first "
-                    "only if you need to locate the anchor event. Requires semantic "
-                    "search to be enabled."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "event_id": {
-                            "type": "string",
-                            "description": "The id of the anchor event to find similar objects to.",
-                        },
-                        "after": {
-                            "type": "string",
-                            "description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
-                        },
-                        "before": {
-                            "type": "string",
-                            "description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
-                        },
-                        "cameras": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Optional list of cameras to restrict to. Defaults to all.",
-                        },
-                        "labels": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Optional list of labels to restrict to. Defaults to the anchor event's label.",
-                        },
-                        "sub_labels": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Optional list of sub_labels (names) to restrict to.",
-                        },
-                        "zones": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Optional list of zones. An event matches if any of its zones overlap.",
-                        },
-                        "similarity_mode": {
-                            "type": "string",
-                            "enum": ["visual", "semantic", "fused"],
-                            "description": "Which similarity signal(s) to use. 'fused' (default) combines visual and semantic.",
-                            "default": "fused",
-                        },
-                        "min_score": {
-                            "type": "number",
-                            "description": "Drop matches with a similarity score below this threshold (0.0-1.0).",
-                        },
-                        "limit": {
-                            "type": "integer",
-                            "description": "Maximum number of matches to return (default: 10).",
-                            "default": 10,
-                        },
-                    },
-                    "required": ["event_id"],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "set_camera_state",
-                "description": (
-                    "Change a camera's feature state (e.g., turn detection on/off, enable/disable recordings). "
-                    "Use camera='*' to apply to all cameras at once. "
-                    "Only call this tool when the user explicitly asks to change a camera setting. "
-                    "Requires admin privileges."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "camera": {
-                            "type": "string",
-                            "description": "Camera name to target, or '*' to target all cameras.",
-                        },
-                        "feature": {
-                            "type": "string",
-                            "enum": [
-                                "detect",
-                                "record",
-                                "snapshots",
-                                "audio",
-                                "motion",
-                                "enabled",
-                                "birdseye",
-                                "birdseye_mode",
-                                "improve_contrast",
-                                "ptz_autotracker",
-                                "motion_contour_area",
-                                "motion_threshold",
-                                "notifications",
-                                "audio_transcription",
-                                "review_alerts",
-                                "review_detections",
-                                "object_descriptions",
-                                "review_descriptions",
-                                "profile",
-                            ],
-                            "description": (
-                                "The feature to change. Most features accept ON or OFF. "
-                                "birdseye_mode accepts CONTINUOUS, MOTION, or OBJECTS. "
-                                "motion_contour_area and motion_threshold accept a number. "
-                                "profile accepts a profile name or 'none' to deactivate (requires camera='*')."
-                            ),
-                        },
-                        "value": {
-                            "type": "string",
-                            "description": "The value to set. ON or OFF for toggles, a number for thresholds, a profile name or 'none' for profile.",
-                        },
-                    },
-                    "required": ["camera", "feature", "value"],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "get_live_context",
-                "description": (
-                    "Get the current live image and detection information for a camera: objects being tracked, "
-                    "zones, timestamps. Use this to understand what is visible in the live view. "
-                    "Call this when answering questions about what is happening right now on a specific camera."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "camera": {
-                            "type": "string",
-                            "description": "Camera name to get live context for.",
-                        },
-                    },
-                    "required": ["camera"],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "start_camera_watch",
-                "description": (
-                    "Start a continuous VLM watch job that monitors a camera and sends a notification "
-                    "when a specified condition is met. Use this when the user wants to be alerted about "
-                    "a future event, e.g. 'tell me when guests arrive' or 'notify me when the package is picked up'. "
-                    "Only one watch job can run at a time. Returns a job ID."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "camera": {
-                            "type": "string",
-                            "description": "Camera ID to monitor.",
-                        },
-                        "condition": {
-                            "type": "string",
-                            "description": (
-                                "Natural-language description of the condition to watch for, "
-                                "e.g. 'a person arrives at the front door'."
-                            ),
-                        },
-                        "max_duration_minutes": {
-                            "type": "integer",
-                            "description": "Maximum time to watch before giving up (minutes, default 60).",
-                            "default": 60,
-                        },
-                        "labels": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Object labels that should trigger a VLM check (e.g. ['person', 'car']). If omitted, any detection on the camera triggers a check.",
-                        },
-                        "zones": {
-                            "type": "array",
-                            "items": {"type": "string"},
-                            "description": "Zone names to filter by. If specified, only detections in these zones trigger a VLM check.",
-                        },
-                    },
-                    "required": ["camera", "condition"],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "stop_camera_watch",
-                "description": (
-                    "Cancel the currently running VLM watch job. Use this when the user wants to "
-                    "stop a previously started watch, e.g. 'stop watching the front door'."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {},
-                    "required": [],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "get_profile_status",
-                "description": (
-                    "Get the current profile status including the active profile and "
-                    "timestamps of when each profile was last activated. Use this to "
-                    "determine time periods for recap requests — e.g. when the user asks "
-                    "'what happened while I was away?', call this first to find the relevant "
-                    "time window based on profile activation history."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {},
-                    "required": [],
-                },
-            },
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "get_recap",
-                "description": (
-                    "Get a recap of all activity (alerts and detections) for a given time period. "
-                    "Use this after calling get_profile_status to retrieve what happened during "
-                    "a specific window — e.g. 'what happened while I was away?'. Returns a "
-                    "chronological list of activity with camera, objects, zones, and GenAI-generated "
-                    "descriptions when available. Summarize the results for the user."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "after": {
-                            "type": "string",
-                            "description": "Start of the time period in ISO 8601 format (e.g. '2025-03-15T08:00:00').",
-                        },
-                        "before": {
-                            "type": "string",
-                            "description": "End of the time period in ISO 8601 format (e.g. '2025-03-15T17:00:00').",
-                        },
-                        "cameras": {
-                            "type": "string",
-                            "description": "Comma-separated camera IDs to include, or 'all' for all cameras. Default is 'all'.",
-                        },
-                        "severity": {
-                            "type": "string",
-                            "enum": ["alert", "detection"],
-                            "description": "Filter by severity level. Omit to include both alerts and detections.",
-                        },
-                    },
-                    "required": ["after", "before"],
-                },
-            },
-        },
-    ]
-
-
@router.get(
    "/chat/tools",
    dependencies=[Depends(allow_any_authenticated())],
@ -460,10 +80,13 @@ def get_tool_definitions(
 )
 def get_tools(request: Request) -> JSONResponse:
    """Get list of available tools for LLM function calling."""
-    semantic_search_enabled = bool(
-        getattr(request.app.frigate_config.semantic_search, "enabled", False)
+    config = request.app.frigate_config
+    semantic_search_enabled = bool(getattr(config.semantic_search, "enabled", False))
+    attribute_classifications = get_attribute_classifications(config)
+    tools = get_tool_definitions(
+        semantic_search_enabled=semantic_search_enabled,
+        attribute_classifications=attribute_classifications,
    )
-    tools = get_tool_definitions(semantic_search_enabled=semantic_search_enabled)
    return JSONResponse(content={"tools": tools})


@ -554,11 +177,14 @@ async def _execute_search_objects(
    elif zones is None:
        zones = "all"

+    attribute = arguments.get("attribute")
+
    # Build query parameters compatible with EventsQueryParams
    query_params = EventsQueryParams(
        cameras=arguments.get("camera", "all"),
        labels=arguments.get("label", "all"),
        sub_labels=arguments.get("sub_label", "all"),  # case-insensitive on the backend
+        attributes=attribute if attribute else "all",
        zones=zones,
        zone=zones,
        after=after,
@ -626,6 +252,7 @@ async def _execute_search_objects_semantic(

    label = arguments.get("label")
    sub_label = arguments.get("sub_label")
+    attribute = arguments.get("attribute")

    zones = arguments.get("zones")
    if isinstance(zones, list) and zones:
@ -668,6 +295,10 @@ async def _execute_search_objects_semantic(
    if sub_label:
        # case-insensitive match to mirror events() behavior
        clauses.append(fn.LOWER(Event.sub_label.cast("text")) == sub_label.lower())
+    if attribute:
+        attribute_clause = _build_attribute_filter_clause(attribute)
+        if attribute_clause is not None:
+            clauses.append(attribute_clause)
    if zones:
        zone_clauses = [Event.zones.cast("text") % f'*"{zone}"*' for zone in zones]
        clauses.append(reduce(operator.or_, zone_clauses))
@ -1481,72 +1112,19 @@ async def chat_completion(

    config = request.app.frigate_config
    semantic_search_enabled = bool(getattr(config.semantic_search, "enabled", False))
-    tools = get_tool_definitions(semantic_search_enabled=semantic_search_enabled)
+    attribute_classifications = get_attribute_classifications(config)
+    tools = get_tool_definitions(
+        semantic_search_enabled=semantic_search_enabled,
+        attribute_classifications=attribute_classifications,
+    )
    conversation = []

-    current_datetime = datetime.now()
-    current_date_str = current_datetime.strftime("%Y-%m-%d")
-    current_time_str = current_datetime.strftime("%I:%M:%S %p")
-
-    cameras_info = []
-    has_speed_zone = False
-    for camera_id in allowed_cameras:
-        if camera_id not in config.cameras:
-            continue
-        camera_config = config.cameras[camera_id]
-        friendly_name = (
-            camera_config.friendly_name
-            if camera_config.friendly_name
-            else camera_id.replace("_", " ").title()
+    system_prompt = build_chat_system_prompt(
+        config=config,
+        allowed_cameras=allowed_cameras,
+        semantic_search_enabled=semantic_search_enabled,
+        attribute_classifications=attribute_classifications,
    )
-        zone_names = list(camera_config.zones.keys())
-        if not has_speed_zone:
-            has_speed_zone = any(
-                zone.distances for zone in camera_config.zones.values()
-            )
-        if zone_names:
-            cameras_info.append(
-                f"  - {friendly_name} (ID: {camera_id}, zones: {', '.join(zone_names)})"
-            )
-        else:
-            cameras_info.append(f"  - {friendly_name} (ID: {camera_id})")
-
-    cameras_section = ""
-    if cameras_info:
-        cameras_section = (
-            "\n\nAvailable cameras:\n"
-            + "\n".join(cameras_info)
-            + "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
-        )
-
-    speed_units_section = ""
-    if has_speed_zone:
-        speed_unit = (
-            "mph" if config.ui.unit_system == UnitSystemEnum.imperial else "km/h"
-        )
-        speed_units_section = f"\n\nReport object speeds to the user in {speed_unit}."
-
-    semantic_search_section = ""
-    if semantic_search_enabled:
-        semantic_search_section = (
-            "\n\nWhen routing a search_objects call, pick filters by the shape of the user's request:\n"
-            "- Generic class ('show me all cars today'): set `label` only.\n"
-            "- Specific named entity — a known person ('John'), delivery company ('Amazon'), animal species/breed ('blue jay', 'cardinal', 'golden retriever'), or license plate: set `sub_label` only and leave `label` unset.\n"
-            "- Physical characteristic, appearance, or activity that is NOT a discrete name ('find me people riding a lawn mower', 'someone in a red jacket', 'a person carrying a package'): set `semantic_query` with the descriptive phrase, optionally combined with `label` for the object class. Never put descriptive phrases in `sub_label`."
-        )
-
-    system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
-
-Current server local date and time: {current_date_str} at {current_time_str}
-
-Do not start your response with phrases like "I will check...", "Let me see...", or "Let me look...". Answer directly.
-
-Always present times to the user in the server's local timezone. When tool results include start_time_local and end_time_local, use those exact strings when listing or describing detection times—do not convert or invent timestamps. Do not use UTC or ISO format with Z for the user-facing answer unless the tool result only provides Unix timestamps without local time fields.
-When users ask about "today", "yesterday", "this week", etc., use the current date above as reference.
-When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
-Always be accurate with time calculations based on the current date provided.
-
-When a user refers to a specific object they have seen or describe with identifying details ("that green car", "the person in the red jacket", "a package left today"), prefer the find_similar_objects tool over search_objects. Use search_objects first only to locate the anchor event, then pass its id to find_similar_objects. For generic queries like "show me all cars today", keep using search_objects. If a user message begins with [attached_event:<id>], treat that event id as the anchor for any similarity or "tell me more" request in the same message and call find_similar_objects with that id.{semantic_search_section}{cameras_section}{speed_units_section}"""

    conversation.append(
        {
@ -1607,6 +1185,13 @@ When a user refers to a specific object they have seen or describe with identify
                            )
                            + b"\n"
                        )
+                    elif kind == "reasoning_delta":
+                        yield (
+                            json.dumps({"type": "reasoning", "delta": value}).encode(
+                                "utf-8"
+                            )
+                            + b"\n"
+                        )
                    elif kind == "stats":
                        yield (
                            json.dumps({"type": "stats", **value}).encode("utf-8")
@ -1707,6 +1292,7 @@ When a user refers to a specific object they have seen or describe with identify
                final_content = response.get("content") or ""

                if body.stream:
+                    final_reasoning = response.get("reasoning")

                    async def stream_body() -> Any:
                        if tool_calls:
@ -1721,6 +1307,15 @@ When a user refers to a specific object they have seen or describe with identify
                                ).encode("utf-8")
                                + b"\n"
                            )
+                        # Emit the full reasoning trace up front when the
+                        # underlying client did not stream it
+                        if final_reasoning:
+                            yield (
+                                json.dumps(
+                                    {"type": "reasoning", "delta": final_reasoning}
+                                ).encode("utf-8")
+                                + b"\n"
+                            )
                        # Stream content in word-sized chunks for smooth UX
                        for part in chunk_content(final_content):
                            yield (
@ -1741,6 +1336,7 @@ When a user refers to a specific object they have seen or describe with identify
                        message=ChatMessageResponse(
                            role="assistant",
                            content=final_content,
+                            reasoning=response.get("reasoning"),
                            tool_calls=None,
                        ),
                        finish_reason=response.get("finish_reason", "stop"),
--- a/frigate/api/defs/response/chat_response.py
+++ b/frigate/api/defs/response/chat_response.py
@ -20,6 +20,10 @@ class ChatMessageResponse(BaseModel):
    content: Optional[str] = Field(
        default=None, description="Message content (None if tool calls present)"
    )
+    reasoning: Optional[str] = Field(
+        default=None,
+        description="Separated reasoning/thinking trace if the model emitted one",
+    )
    tool_calls: Optional[list[ToolCallInvocation]] = Field(
        default=None, description="Tool calls if LLM wants to call tools"
    )
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -1,6 +1,5 @@
 """Generative AI module for Frigate."""

-import datetime
 import importlib
 import json
 import logging
@ -9,13 +8,18 @@ import re
 from typing import Any, Callable, Optional

 import numpy as np
-from playhouse.shortcuts import model_to_dict
 from pydantic import ValidationError

 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
 from frigate.const import CLIPS_DIR
 from frigate.data_processing.post.types import ReviewMetadata
 from frigate.genai.manager import GenAIClientManager
+from frigate.genai.prompts import (
+    build_object_description_prompt,
+    build_review_description_prompt,
+    build_review_description_response_format,
+    build_review_summary_prompt,
+)
 from frigate.models import Event

 logger = logging.getLogger(__name__)
@ -61,75 +65,14 @@ class GenAIClient:
        activity_context_prompt: str,
    ) -> ReviewMetadata | None:
        """Generate a description for the review item activity."""
+        context_prompt = build_review_description_prompt(
+            review_data,
+            thumbnails,
+            concerns,
+            preferred_language,
+            activity_context_prompt,
+        )

-        def get_concern_prompt() -> str:
-            if concerns:
-                concern_list = "\n    - ".join(concerns)
-                return f"""- `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring:
-    - {concern_list}"""
-            else:
-                return ""
-
-        def get_language_prompt() -> str:
-            if preferred_language:
-                return f"Provide your answer in {preferred_language}"
-            else:
-                return ""
-
-        def get_objects_list() -> str:
-            if review_data["unified_objects"]:
-                return "\n- " + "\n- ".join(review_data["unified_objects"])
-            else:
-                return "\n- (No objects detected)"
-
-        context_prompt = f"""
-Your task is to analyze a sequence of images taken in chronological order from a security camera.
-
-## Normal Activity Patterns for This Property
-
-{activity_context_prompt}
-
-## Task Instructions
-
-Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
-
-## Analysis Guidelines
-
-When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day context—do not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
-
-## Response Field Guidelines
-
-Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity — for example, a vehicle entering the frame or pulling into the driveway — even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events — all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `←` separator in "Objects in Scene"), always use their name — do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
-
-## Sequence Details
-
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
-
-## Objects in Scene
-
-Each line represents a detection state, not necessarily unique individuals. The `←` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
-
-**Note: Unidentified objects (without names) are NOT indicators of suspicious activity—they simply mean the system hasn't identified that object.**
-{get_objects_list()}
-
-{get_language_prompt()}
-"""
        logger.debug(
            f"Sending {len(thumbnails)} images to create review description on {review_data['camera']}"
        )
@ -143,25 +86,7 @@ Each line represents a detection state, not necessarily unique individuals. The
            ) as f:
                f.write(context_prompt)

-        # Build JSON schema for structured output from ReviewMetadata model
-        schema = ReviewMetadata.model_json_schema()
-        schema.get("properties", {}).pop("time", None)
-
-        if "time" in schema.get("required", []):
-            schema["required"].remove("time")
-        if not concerns:
-            schema.get("properties", {}).pop("other_concerns", None)
-            if "other_concerns" in schema.get("required", []):
-                schema["required"].remove("other_concerns")
-
-        response_format = {
-            "type": "json_schema",
-            "json_schema": {
-                "name": "review_metadata",
-                "strict": True,
-                "schema": schema,
-            },
-        }
+        response_format = build_review_description_response_format(concerns)

        response = self._send(context_prompt, thumbnails, response_format)

@ -240,61 +165,9 @@ Each line represents a detection state, not necessarily unique individuals. The
        debug_save: bool,
    ) -> str | None:
        """Generate a summary of review item descriptions over a period of time."""
-        time_range = f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')} to {datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
-        timeline_summary_prompt = f"""
-You are a security officer writing a concise security report.
-
-Time range: {time_range}
-
-Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
-
-**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
-
-Report Structure - Use this EXACT format:
-
-# Security Summary - {time_range}
-
-## Overview
-[Write 1-2 sentences summarizing the overall activity pattern during this period.]
-
---
-
-## Timeline
-
-[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
-
-### [Time Block Name]
-
-**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
-
-[Repeat for each event in chronological order within the time block]
-
---
-
-## Summary
-[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
-
-Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: ✓ Normal, ⚠️ Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
-"""
-
-        timeline_summary_prompt += "\n\nEvents:\n"
-        for event in events:
-            timeline_summary_prompt += f"\n{event}\n"
-
-        if preferred_language:
-            timeline_summary_prompt += f"\nProvide your answer in {preferred_language}"
+        timeline_summary_prompt = build_review_summary_prompt(
+            start_ts, end_ts, events, preferred_language
+        )

        if debug_save:
            with open(
@ -326,10 +199,7 @@ Guidelines:
    ) -> Optional[str]:
        """Generate a description for the frame."""
        try:
-            prompt = camera_config.objects.genai.object_prompts.get(
-                str(event.label),
-                camera_config.objects.genai.prompt,
-            ).format(**model_to_dict(event))
+            prompt = build_object_description_prompt(camera_config, event)
        except KeyError as e:
            logger.error(f"Invalid key in GenAI prompt: {e}")
            return None
@ -430,6 +300,10 @@ Guidelines:
        Returns:
            Dictionary with:
            - 'content': Optional[str] - The text response from the LLM, None if tool calls
+            - 'reasoning': Optional[str] - The separated reasoning/thinking trace
+              if the model emitted one (e.g. via OpenAI-compatible
+              `reasoning_content`). None when the model does not surface a
+              trace or the provider does not parse it.
            - 'tool_calls': Optional[List[Dict]] - List of tool calls if LLM wants to call tools.
              Each tool call dict has:
                - 'id': str - Unique identifier for this tool call
@ -441,6 +315,14 @@ Guidelines:
                - 'length': Hit token limit
                - 'error': An error occurred

+        Streaming counterpart `chat_with_tools_stream` yields
+        ``(kind, value)`` tuples where ``kind`` is one of:
+            - 'content_delta': value is a string fragment of the answer
+            - 'reasoning_delta': value is a string fragment of the reasoning
+              trace (emitted before content for thinking models)
+            - 'stats': value is a usage stats dict
+            - 'message': value is the final dict shape described above
+
        Raises:
            NotImplementedError: If the provider doesn't implement this method.
        """
@ -451,14 +333,15 @@ Guidelines:
        )
        return {
            "content": None,
+            "reasoning": None,
            "tool_calls": None,
            "finish_reason": "error",
        }


 def load_providers() -> None:
-    package_dir = os.path.dirname(__file__)
-    for filename in os.listdir(package_dir):
+    plugins_dir = os.path.join(os.path.dirname(__file__), "plugins")
+    for filename in os.listdir(plugins_dir):
        if filename.endswith(".py") and filename != "__init__.py":
-            module_name = f"frigate.genai.{filename[:-3]}"
+            module_name = f"frigate.genai.plugins.{filename[:-3]}"
            importlib.import_module(module_name)
--- a/frigate/genai/azure-openai.py
+++ b/frigate/genai/azure-openai.py
@ -1,315 +0,0 @@
-"""Azure OpenAI Provider for Frigate AI."""
-
-import base64
-import json
-import logging
-from typing import Any, AsyncGenerator, Optional
-from urllib.parse import parse_qs, urlparse
-
-from openai import AzureOpenAI
-
-from frigate.config import GenAIProviderEnum
-from frigate.genai import GenAIClient, register_genai_provider
-from frigate.genai.openai import _stats_from_openai_usage
-
-logger = logging.getLogger(__name__)
-
-
-@register_genai_provider(GenAIProviderEnum.azure_openai)
-class OpenAIClient(GenAIClient):
-    """Generative AI client for Frigate using Azure OpenAI."""
-
-    provider: AzureOpenAI
-
-    def _init_provider(self) -> AzureOpenAI | None:
-        """Initialize the client."""
-        try:
-            parsed_url = urlparse(self.genai_config.base_url or "")
-            query_params = parse_qs(parsed_url.query)
-            api_version = query_params.get("api-version", [None])[0]
-            azure_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/"
-
-            if not api_version:
-                logger.warning("Azure OpenAI url is missing API version.")
-                return None
-
-        except Exception as e:
-            logger.warning("Error parsing Azure OpenAI url: %s", str(e))
-            return None
-
-        return AzureOpenAI(
-            api_key=self.genai_config.api_key,
-            api_version=api_version,
-            azure_endpoint=azure_endpoint,
-        )
-
-    def _send(
-        self,
-        prompt: str,
-        images: list[bytes],
-        response_format: Optional[dict] = None,
-    ) -> Optional[str]:
-        """Submit a request to Azure OpenAI."""
-        encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
-        try:
-            request_params = {
-                "model": self.genai_config.model,
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": [{"type": "text", "text": prompt}]
-                        + [
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{image}",
-                                    "detail": "low",
-                                },
-                            }
-                            for image in encoded_images
-                        ],
-                    },
-                ],
-                "timeout": self.timeout,
-                **self.genai_config.runtime_options,
-            }
-            if response_format:
-                request_params["response_format"] = response_format
-            result = self.provider.chat.completions.create(**request_params)
-        except Exception as e:
-            logger.warning("Azure OpenAI returned an error: %s", str(e))
-            return None
-        if len(result.choices) > 0:
-            return str(result.choices[0].message.content.strip())
-        return None
-
-    def list_models(self) -> list[str]:
-        """Return available model IDs from Azure OpenAI."""
-        try:
-            return sorted(m.id for m in self.provider.models.list().data)
-        except Exception as e:
-            logger.warning("Failed to list Azure OpenAI models: %s", e)
-            return []
-
-    def get_context_size(self) -> int:
-        """Get the context window size for Azure OpenAI."""
-        return 128000
-
-    def chat_with_tools(
-        self,
-        messages: list[dict[str, Any]],
-        tools: Optional[list[dict[str, Any]]] = None,
-        tool_choice: Optional[str] = "auto",
-    ) -> dict[str, Any]:
-        try:
-            openai_tool_choice = None
-            if tool_choice:
-                if tool_choice == "none":
-                    openai_tool_choice = "none"
-                elif tool_choice == "auto":
-                    openai_tool_choice = "auto"
-                elif tool_choice == "required":
-                    openai_tool_choice = "required"
-
-            request_params = {
-                "model": self.genai_config.model,
-                "messages": messages,
-                "timeout": self.timeout,
-            }
-
-            if tools:
-                request_params["tools"] = tools
-                if openai_tool_choice is not None:
-                    request_params["tool_choice"] = openai_tool_choice
-
-            result = self.provider.chat.completions.create(**request_params)  # type: ignore[call-overload]
-
-            if (
-                result is None
-                or not hasattr(result, "choices")
-                or len(result.choices) == 0
-            ):
-                return {
-                    "content": None,
-                    "tool_calls": None,
-                    "finish_reason": "error",
-                }
-
-            choice = result.choices[0]
-            message = choice.message
-
-            content = message.content.strip() if message.content else None
-
-            tool_calls = None
-            if message.tool_calls:
-                tool_calls = []
-                for tool_call in message.tool_calls:
-                    try:
-                        arguments = json.loads(tool_call.function.arguments)
-                    except (json.JSONDecodeError, AttributeError) as e:
-                        logger.warning(
-                            f"Failed to parse tool call arguments: {e}, "
-                            f"tool: {tool_call.function.name if hasattr(tool_call.function, 'name') else 'unknown'}"
-                        )
-                        arguments = {}
-
-                    tool_calls.append(
-                        {
-                            "id": tool_call.id if hasattr(tool_call, "id") else "",
-                            "name": tool_call.function.name
-                            if hasattr(tool_call.function, "name")
-                            else "",
-                            "arguments": arguments,
-                        }
-                    )
-
-            finish_reason = "error"
-            if hasattr(choice, "finish_reason") and choice.finish_reason:
-                finish_reason = choice.finish_reason
-            elif tool_calls:
-                finish_reason = "tool_calls"
-            elif content:
-                finish_reason = "stop"
-
-            return {
-                "content": content,
-                "tool_calls": tool_calls,
-                "finish_reason": finish_reason,
-            }
-
-        except Exception as e:
-            logger.warning("Azure OpenAI returned an error: %s", str(e))
-            return {
-                "content": None,
-                "tool_calls": None,
-                "finish_reason": "error",
-            }
-
-    async def chat_with_tools_stream(
-        self,
-        messages: list[dict[str, Any]],
-        tools: Optional[list[dict[str, Any]]] = None,
-        tool_choice: Optional[str] = "auto",
-    ) -> AsyncGenerator[tuple[str, Any], None]:
-        """
-        Stream chat with tools; yields content deltas then final message.
-
-        Implements streaming function calling/tool usage for Azure OpenAI models.
-        """
-        try:
-            openai_tool_choice = None
-            if tool_choice:
-                if tool_choice == "none":
-                    openai_tool_choice = "none"
-                elif tool_choice == "auto":
-                    openai_tool_choice = "auto"
-                elif tool_choice == "required":
-                    openai_tool_choice = "required"
-
-            request_params = {
-                "model": self.genai_config.model,
-                "messages": messages,
-                "timeout": self.timeout,
-                "stream": True,
-                "stream_options": {"include_usage": True},
-            }
-
-            if tools:
-                request_params["tools"] = tools
-                if openai_tool_choice is not None:
-                    request_params["tool_choice"] = openai_tool_choice
-
-            # Use streaming API
-            content_parts: list[str] = []
-            tool_calls_by_index: dict[int, dict[str, Any]] = {}
-            finish_reason = "stop"
-            usage_stats: Optional[dict[str, Any]] = None
-
-            stream = self.provider.chat.completions.create(**request_params)  # type: ignore[call-overload]
-
-            for chunk in stream:
-                chunk_usage = getattr(chunk, "usage", None)
-                if chunk_usage is not None:
-                    usage_stats = _stats_from_openai_usage(chunk_usage)
-
-                if not chunk or not chunk.choices:
-                    continue
-
-                choice = chunk.choices[0]
-                delta = choice.delta
-
-                # Check for finish reason
-                if choice.finish_reason:
-                    finish_reason = choice.finish_reason
-
-                # Extract content deltas
-                if delta.content:
-                    content_parts.append(delta.content)
-                    yield ("content_delta", delta.content)
-
-                # Extract tool calls
-                if delta.tool_calls:
-                    for tc in delta.tool_calls:
-                        idx = tc.index
-                        fn = tc.function
-
-                        if idx not in tool_calls_by_index:
-                            tool_calls_by_index[idx] = {
-                                "id": tc.id or "",
-                                "name": fn.name if fn and fn.name else "",
-                                "arguments": "",
-                            }
-
-                        t = tool_calls_by_index[idx]
-                        if tc.id:
-                            t["id"] = tc.id
-                        if fn and fn.name:
-                            t["name"] = fn.name
-                        if fn and fn.arguments:
-                            t["arguments"] += fn.arguments
-
-            # Build final message
-            full_content = "".join(content_parts).strip() or None
-
-            # Convert tool calls to list format
-            tool_calls_list = None
-            if tool_calls_by_index:
-                tool_calls_list = []
-                for tc in tool_calls_by_index.values():
-                    try:
-                        # Parse accumulated arguments as JSON
-                        parsed_args = json.loads(tc["arguments"])
-                    except (json.JSONDecodeError, Exception):
-                        parsed_args = tc["arguments"]
-
-                    tool_calls_list.append(
-                        {
-                            "id": tc["id"],
-                            "name": tc["name"],
-                            "arguments": parsed_args,
-                        }
-                    )
-                finish_reason = "tool_calls"
-
-            if usage_stats is not None:
-                yield ("stats", usage_stats)
-
-            yield (
-                "message",
-                {
-                    "content": full_content,
-                    "tool_calls": tool_calls_list,
-                    "finish_reason": finish_reason,
-                },
-            )
-
-        except Exception as e:
-            logger.warning("Azure OpenAI streaming returned an error: %s", str(e))
-            yield (
-                "message",
-                {
-                    "content": None,
-                    "tool_calls": None,
-                    "finish_reason": "error",
-                },
-            )
--- a/frigate/genai/plugins/init.py
+++ b/frigate/genai/plugins/init.py
@ -0,0 +1 @@
+"""GenAI provider plugins."""
--- a/frigate/genai/plugins/azure-openai.py
+++ b/frigate/genai/plugins/azure-openai.py
@ -0,0 +1,53 @@
+"""Azure OpenAI Provider for Frigate AI.
+
+Azure OpenAI exposes the same chat completions API as OpenAI once the
+client is constructed, so this provider inherits all transport, streaming,
+reasoning, and tool-calling logic from :class:`OpenAIClient` and only
+overrides what is genuinely Azure-specific:
+
+- Client construction: parses ``api-version`` out of the configured
+  ``base_url`` query string and instantiates :class:`openai.AzureOpenAI`
+  with ``azure_endpoint`` instead of ``base_url``. Raises if the URL is
+  malformed; :class:`GenAIClientManager` catches the exception and
+  disables the provider.
+- Context size: Azure does not expose a per-model ``max_model_len`` field
+  reliably, so we keep the historical 128K default rather than the
+  model-name heuristic used by OpenAI.
+"""
+
+import logging
+from urllib.parse import parse_qs, urlparse
+
+from openai import AzureOpenAI
+
+from frigate.config import GenAIProviderEnum
+from frigate.genai import register_genai_provider
+from frigate.genai.plugins.openai import OpenAIClient
+
+logger = logging.getLogger(__name__)
+
+
+@register_genai_provider(GenAIProviderEnum.azure_openai)
+class AzureOpenAIClient(OpenAIClient):
+    """Generative AI client for Frigate using Azure OpenAI."""
+
+    def _init_provider(self) -> AzureOpenAI:
+        """Initialize the AzureOpenAI client from the configured base_url."""
+        parsed_url = urlparse(self.genai_config.base_url or "")
+        query_params = parse_qs(parsed_url.query)
+        api_version = query_params.get("api-version", [None])[0]
+
+        if not api_version:
+            raise ValueError("Azure OpenAI base_url is missing api-version.")
+
+        azure_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/"
+
+        return AzureOpenAI(
+            api_key=self.genai_config.api_key,
+            api_version=api_version,
+            azure_endpoint=azure_endpoint,
+        )
+
+    def get_context_size(self) -> int:
+        """Azure does not reliably surface per-model context size; use 128K."""
+        return 128000
--- a/frigate/genai/plugins/gemini.py
+++ b/frigate/genai/plugins/gemini.py
@ -248,6 +248,13 @@ class GeminiClient(GenAIClient):
            if tool_config:
                config_params["tool_config"] = tool_config

+            # Ask thinking-capable models (Gemini 2.5+) to include their
+            # reasoning trace as separate `thought` parts so we can surface
+            # it on the reasoning channel. Older models ignore this field.
+            config_params["thinking_config"] = types.ThinkingConfig(
+                include_thoughts=True
+            )
+
            # Merge runtime_options
            if isinstance(self.genai_config.runtime_options, dict):
                config_params.update(self.genai_config.runtime_options)
@ -262,18 +269,23 @@ class GeminiClient(GenAIClient):
            if not response or not response.candidates:
                return {
                    "content": None,
+                    "reasoning": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                }

            candidate = response.candidates[0]
            content = None
+            reasoning_parts: list[str] = []
            tool_calls = None

-            # Extract content and tool calls from response
+            # Extract content, reasoning, and tool calls from response
            if candidate.content and candidate.content.parts:
                for part in candidate.content.parts:
                    if part.text:
+                        if getattr(part, "thought", False):
+                            reasoning_parts.append(part.text)
+                        else:
                            content = part.text.strip()
                    elif part.function_call:
                        # Handle function call
@ -297,6 +309,8 @@ class GeminiClient(GenAIClient):
                            }
                        )

+            reasoning = "".join(reasoning_parts).strip() or None
+
            # Determine finish reason
            finish_reason = "error"
            if hasattr(candidate, "finish_reason") and candidate.finish_reason:
@ -322,6 +336,7 @@ class GeminiClient(GenAIClient):

            return {
                "content": content,
+                "reasoning": reasoning,
                "tool_calls": tool_calls,
                "finish_reason": finish_reason,
            }
@ -330,6 +345,7 @@ class GeminiClient(GenAIClient):
            logger.warning("Gemini API error during chat_with_tools: %s", str(e))
            return {
                "content": None,
+                "reasoning": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
@ -339,6 +355,7 @@ class GeminiClient(GenAIClient):
            )
            return {
                "content": None,
+                "reasoning": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
@ -477,12 +494,19 @@ class GeminiClient(GenAIClient):
            if tool_config:
                config_params["tool_config"] = tool_config

+            # Ask thinking-capable models to include their reasoning trace
+            # as separate `thought` parts (Gemini 2.5+; ignored elsewhere).
+            config_params["thinking_config"] = types.ThinkingConfig(
+                include_thoughts=True
+            )
+
            # Merge runtime_options
            if isinstance(self.genai_config.runtime_options, dict):
                config_params.update(self.genai_config.runtime_options)

            # Use streaming API
            content_parts: list[str] = []
+            reasoning_parts: list[str] = []
            tool_calls_by_index: dict[int, dict[str, Any]] = {}
            finish_reason = "stop"
            usage_stats: Optional[dict[str, Any]] = None
@ -519,10 +543,14 @@ class GeminiClient(GenAIClient):
                    ]:
                        finish_reason = "error"

-                # Extract content and tool calls from chunk
+                # Extract content, reasoning, and tool calls from chunk
                if candidate.content and candidate.content.parts:
                    for part in candidate.content.parts:
                        if part.text:
+                            if getattr(part, "thought", False):
+                                reasoning_parts.append(part.text)
+                                yield ("reasoning_delta", part.text)
+                            else:
                                content_parts.append(part.text)
                                yield ("content_delta", part.text)
                        elif part.function_call:
@ -565,6 +593,7 @@ class GeminiClient(GenAIClient):

            # Build final message
            full_content = "".join(content_parts).strip() or None
+            full_reasoning = "".join(reasoning_parts).strip() or None

            # Convert tool calls to list format
            tool_calls_list = None
@ -593,6 +622,7 @@ class GeminiClient(GenAIClient):
                "message",
                {
                    "content": full_content,
+                    "reasoning": full_reasoning,
                    "tool_calls": tool_calls_list,
                    "finish_reason": finish_reason,
                },
@ -604,6 +634,7 @@ class GeminiClient(GenAIClient):
                "message",
                {
                    "content": None,
+                    "reasoning": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
@ -616,6 +647,7 @@ class GeminiClient(GenAIClient):
                "message",
                {
                    "content": None,
+                    "reasoning": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
--- a/frigate/genai/plugins/llama_cpp.py
+++ b/frigate/genai/plugins/llama_cpp.py
@ -527,19 +527,28 @@ class LlamaCppClient(GenAIClient):
            k: v for k, v in self.provider_options.items() if k != "context_size"
        }
        payload.update(provider_opts)
+        payload.update(self.genai_config.runtime_options)
        return payload

    def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
-        """Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
+        """Parse OpenAI-style choice into {content, reasoning, tool_calls, finish_reason}.
+
+        llama.cpp's `--reasoning-format` puts the trace in
+        `message.reasoning_content` (preferred) or `message.thinking`; both
+        keys are accepted so different builds work without configuration.
+        """
        message = choice.get("message", {})
        content = message.get("content")
        content = content.strip() if content else None
+        reasoning = message.get("reasoning_content") or message.get("thinking")
+        reasoning = reasoning.strip() if reasoning else None
        tool_calls = parse_tool_calls_from_message(message)
        finish_reason = choice.get("finish_reason") or (
            "tool_calls" if tool_calls else "stop" if content else "error"
        )
        return {
            "content": content,
+            "reasoning": reasoning,
            "tool_calls": tool_calls,
            "finish_reason": finish_reason,
        }
@ -802,6 +811,7 @@ class LlamaCppClient(GenAIClient):
        try:
            payload = self._build_payload(messages, tools, tool_choice, stream=True)
            content_parts: list[str] = []
+            reasoning_parts: list[str] = []
            tool_calls_by_index: dict[int, dict[str, Any]] = {}
            finish_reason = "stop"

@ -831,6 +841,15 @@ class LlamaCppClient(GenAIClient):
                        delta = choices[0].get("delta", {})
                        if choices[0].get("finish_reason"):
                            finish_reason = choices[0]["finish_reason"]
+                        # llama.cpp emits separated thinking under
+                        # reasoning_content (preferred) or thinking before any
+                        # content tokens arrive
+                        reasoning_delta = delta.get("reasoning_content") or delta.get(
+                            "thinking"
+                        )
+                        if reasoning_delta:
+                            reasoning_parts.append(reasoning_delta)
+                            yield ("reasoning_delta", reasoning_delta)
                        if delta.get("content"):
                            content_parts.append(delta["content"])
                            yield ("content_delta", delta["content"])
@ -856,6 +875,7 @@ class LlamaCppClient(GenAIClient):
                                )

            full_content = "".join(content_parts).strip() or None
+            full_reasoning = "".join(reasoning_parts).strip() or None
            tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
            if tool_calls_list:
                finish_reason = "tool_calls"
@ -863,6 +883,7 @@ class LlamaCppClient(GenAIClient):
                "message",
                {
                    "content": full_content,
+                    "reasoning": full_reasoning,
                    "tool_calls": tool_calls_list,
                    "finish_reason": finish_reason,
                },
--- a/frigate/genai/plugins/ollama.py
+++ b/frigate/genai/plugins/ollama.py
@ -309,6 +309,7 @@ class OllamaClient(GenAIClient):
            "model": self.genai_config.model,
            "messages": request_messages,
            **self.provider_options,
+            **self.genai_config.runtime_options,
        }
        if stream:
            request_params["stream"] = True
@ -336,6 +337,9 @@ class OllamaClient(GenAIClient):
            response.get("done"),
        )
        content = message.get("content", "").strip() if message.get("content") else None
+        reasoning = (
+            message.get("thinking", "").strip() if message.get("thinking") else None
+        )
        tool_calls = parse_tool_calls_from_message(message)
        finish_reason = "error"
        if response.get("done"):
@ -348,6 +352,7 @@ class OllamaClient(GenAIClient):
            finish_reason = "stop"
        return {
            "content": content,
+            "reasoning": reasoning,
            "tool_calls": tool_calls,
            "finish_reason": finish_reason,
        }
@ -431,6 +436,9 @@ class OllamaClient(GenAIClient):
                )
                response = await async_client.chat(**request_params)
                result = self._message_from_response(response)
+                reasoning = result.get("reasoning")
+                if reasoning:
+                    yield ("reasoning_delta", reasoning)
                content = result.get("content")
                if content:
                    yield ("content_delta", content)
@ -449,6 +457,7 @@ class OllamaClient(GenAIClient):
                headers=self._auth_headers(),
            )
            content_parts: list[str] = []
+            reasoning_parts: list[str] = []
            final_message: dict[str, Any] | None = None
            final_chunk: Any = None
            stream = await async_client.chat(**request_params)
@ -456,6 +465,10 @@ class OllamaClient(GenAIClient):
                if not chunk or "message" not in chunk:
                    continue
                msg = chunk.get("message", {})
+                reasoning_delta = msg.get("thinking") or ""
+                if reasoning_delta:
+                    reasoning_parts.append(reasoning_delta)
+                    yield ("reasoning_delta", reasoning_delta)
                delta = msg.get("content") or ""
                if delta:
                    content_parts.append(delta)
@ -463,8 +476,10 @@ class OllamaClient(GenAIClient):
                if chunk.get("done"):
                    final_chunk = chunk
                    full_content = "".join(content_parts).strip() or None
+                    full_reasoning = "".join(reasoning_parts).strip() or None
                    final_message = {
                        "content": full_content,
+                        "reasoning": full_reasoning,
                        "tool_calls": None,
                        "finish_reason": "stop",
                    }
@ -481,6 +496,7 @@ class OllamaClient(GenAIClient):
                    "message",
                    {
                        "content": "".join(content_parts).strip() or None,
+                        "reasoning": "".join(reasoning_parts).strip() or None,
                        "tool_calls": None,
                        "finish_reason": "stop",
                    },
--- a/frigate/genai/plugins/openai.py
+++ b/frigate/genai/plugins/openai.py
@ -38,7 +38,11 @@ class OpenAIClient(GenAIClient):
    context_size: Optional[int] = None

    def _init_provider(self) -> OpenAI:
-        """Initialize the client."""
+        """Initialize the client.
+
+        Subclasses (e.g. Azure) should raise on configuration errors; the
+        manager catches construction failures and disables the provider.
+        """
        # Extract context_size from provider_options as it's not a valid OpenAI client parameter
        # It will be used in get_context_size() instead
        provider_opts = {
@ -236,6 +240,10 @@ class OpenAIClient(GenAIClient):
            choice = result.choices[0]
            message = choice.message
            content = message.content.strip() if message.content else None
+            raw_reasoning = getattr(message, "reasoning_content", None) or getattr(
+                message, "reasoning", None
+            )
+            reasoning = raw_reasoning.strip() if raw_reasoning else None

            tool_calls = None
            if message.tool_calls:
@ -270,6 +278,7 @@ class OpenAIClient(GenAIClient):

            return {
                "content": content,
+                "reasoning": reasoning,
                "tool_calls": tool_calls,
                "finish_reason": finish_reason,
            }
@ -278,6 +287,7 @@ class OpenAIClient(GenAIClient):
            logger.warning("OpenAI request timed out: %s", str(e))
            return {
                "content": None,
+                "reasoning": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
@ -285,6 +295,7 @@ class OpenAIClient(GenAIClient):
            logger.warning("OpenAI returned an error: %s", str(e))
            return {
                "content": None,
+                "reasoning": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
@ -335,6 +346,7 @@ class OpenAIClient(GenAIClient):

            # Use streaming API
            content_parts: list[str] = []
+            reasoning_parts: list[str] = []
            tool_calls_by_index: dict[int, dict[str, Any]] = {}
            finish_reason = "stop"
            usage_stats: Optional[dict[str, Any]] = None
@ -356,6 +368,15 @@ class OpenAIClient(GenAIClient):
                if choice.finish_reason:
                    finish_reason = choice.finish_reason

+                # Extract reasoning deltas (reasoning_content or reasoning,
+                # depending on the server)
+                reasoning_delta = getattr(delta, "reasoning_content", None) or getattr(
+                    delta, "reasoning", None
+                )
+                if reasoning_delta:
+                    reasoning_parts.append(reasoning_delta)
+                    yield ("reasoning_delta", reasoning_delta)
+
                # Extract content deltas
                if delta.content:
                    content_parts.append(delta.content)
@ -384,6 +405,7 @@ class OpenAIClient(GenAIClient):

            # Build final message
            full_content = "".join(content_parts).strip() or None
+            full_reasoning = "".join(reasoning_parts).strip() or None

            # Convert tool calls to list format
            tool_calls_list = None
@ -412,6 +434,7 @@ class OpenAIClient(GenAIClient):
                "message",
                {
                    "content": full_content,
+                    "reasoning": full_reasoning,
                    "tool_calls": tool_calls_list,
                    "finish_reason": finish_reason,
                },
@ -423,6 +446,7 @@ class OpenAIClient(GenAIClient):
                "message",
                {
                    "content": None,
+                    "reasoning": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
@ -433,6 +457,7 @@ class OpenAIClient(GenAIClient):
                "message",
                {
                    "content": None,
+                    "reasoning": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
--- a/frigate/genai/prompts.py
+++ b/frigate/genai/prompts.py
@ -0,0 +1,739 @@
+"""Prompt and response-format builders for GenAI features.
+
+Centralizes the per-feature prompt framing and structured-output schema
+shaping so provider clients in :mod:`frigate.genai.plugins` only handle
+transport.
+"""
+
+import datetime
+from typing import Any, Dict, List, Optional
+
+from playhouse.shortcuts import model_to_dict
+
+from frigate.config import CameraConfig, FrigateConfig
+from frigate.config.classification import ObjectClassificationType
+from frigate.config.ui import UnitSystemEnum
+from frigate.data_processing.post.types import ReviewMetadata
+from frigate.models import Event
+
+
+def build_review_description_prompt(
+    review_data: dict[str, Any],
+    thumbnails: list[bytes],
+    concerns: list[str],
+    preferred_language: str | None,
+    activity_context_prompt: str,
+) -> str:
+    """Build the prompt for review activity description generation."""
+
+    def get_concern_prompt() -> str:
+        if concerns:
+            concern_list = "\n    - ".join(concerns)
+            return (
+                "\n- `other_concerns` (list of strings): Include a list of any of "
+                "the following concerns that are occurring:\n"
+                f"    - {concern_list}"
+            )
+        else:
+            return ""
+
+    def get_language_prompt() -> str:
+        if preferred_language:
+            return f"Provide your answer in {preferred_language}"
+        else:
+            return ""
+
+    def get_objects_list() -> str:
+        if review_data["unified_objects"]:
+            return "\n- " + "\n- ".join(review_data["unified_objects"])
+        else:
+            return "\n- (No objects detected)"
+
+    return f"""
+Your task is to analyze a sequence of images taken in chronological order from a security camera.
+
+## Normal Activity Patterns for This Property
+
+{activity_context_prompt}
+
+## Task Instructions
+
+Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
+
+## Analysis Guidelines
+
+When forming your description:
+- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
+- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
+- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
+- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
+- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
+- **Use the actual timestamp provided in "Activity started at"** below for time of day context—do not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
+- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
+- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
+
+## Response Field Guidelines
+
+Respond with a JSON object matching the provided schema. Field-specific guidance:
+- `observations`: Include the very start of the activity — for example, a vehicle entering the frame or pulling into the driveway — even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
+- `scene`: Describe how the sequence begins, then the progression of events — all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `←` separator in "Objects in Scene"), always use their name — do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
+- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
+- `shortSummary`: Briefly summarize the primary activity across the observations.
+- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
+{get_concern_prompt()}
+
+## Sequence Details
+
+- Camera: {review_data["camera"]}
+- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
+- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
+- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
+
+## Objects in Scene
+
+Each line represents a detection state, not necessarily unique individuals. The `←` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
+
+**Note: Unidentified objects (without names) are NOT indicators of suspicious activity—they simply mean the system hasn't identified that object.**
+{get_objects_list()}
+
+{get_language_prompt()}
+"""
+
+
+def build_review_description_response_format(concerns: list[str]) -> dict[str, Any]:
+    """Build the structured-output JSON schema for review descriptions.
+
+    Strips the `time` field (populated server-side) and drops
+    `other_concerns` when no concerns are configured.
+    """
+    schema = ReviewMetadata.model_json_schema()
+    schema.get("properties", {}).pop("time", None)
+
+    if "time" in schema.get("required", []):
+        schema["required"].remove("time")
+    if not concerns:
+        schema.get("properties", {}).pop("other_concerns", None)
+        if "other_concerns" in schema.get("required", []):
+            schema["required"].remove("other_concerns")
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "review_metadata",
+            "strict": True,
+            "schema": schema,
+        },
+    }
+
+
+def build_review_summary_prompt(
+    start_ts: float,
+    end_ts: float,
+    events: list[dict[str, Any]],
+    preferred_language: str | None,
+) -> str:
+    """Build the prompt for a multi-event review summary."""
+    time_range = (
+        f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')}"
+        f" to "
+        f"{datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
+    )
+    prompt = f"""
+You are a security officer writing a concise security report.
+
+Time range: {time_range}
+
+Input format: Each event is a JSON object with:
+- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
+- "context": array of related events from other cameras that occurred during overlapping time periods
+
+**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
+
+Report Structure - Use this EXACT format:
+
+# Security Summary - {time_range}
+
+## Overview
+[Write 1-2 sentences summarizing the overall activity pattern during this period.]
+
+---
+
+## Timeline
+
+[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
+
+### [Time Block Name]
+
+**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
+- [Event title]: [Clear description incorporating contextual information from the "context" array]
+- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
+- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
+
+[Repeat for each event in chronological order within the time block]
+
+---
+
+## Summary
+[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
+
+Guidelines:
+- List ALL events in chronological order, grouped by time blocks
+- Threat level indicators: ✓ Normal, ⚠️ Needs review, 🔴 Security concern
+- Integrate contextual information naturally - use the "context" array to enrich each event's description
+- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
+- Be concise but informative - focus on what happened and what it means
+- If contextual information makes an event clearly normal, reflect that in your assessment
+- Only create time blocks that have events - don't create empty sections
+"""
+
+    prompt += "\n\nEvents:\n"
+    for event in events:
+        prompt += f"\n{event}\n"
+
+    if preferred_language:
+        prompt += f"\nProvide your answer in {preferred_language}"
+
+    return prompt
+
+
+def build_object_description_prompt(
+    camera_config: CameraConfig,
+    event: Event,
+) -> str:
+    """Build the prompt for a per-object description.
+
+    Pulls the per-label override from `objects.genai.object_prompts`, falling
+    back to the camera default, and interpolates event fields.
+
+    Raises:
+        KeyError: if the user-defined prompt template references an unknown
+            event field.
+    """
+    template = camera_config.objects.genai.object_prompts.get(
+        str(event.label),
+        camera_config.objects.genai.prompt,
+    )
+    return template.format(**model_to_dict(event))
+
+
+def get_attribute_classifications(config: FrigateConfig) -> List[Dict[str, Any]]:
+    """Return enabled custom classification models of `attribute` type.
+
+    Each entry: {"name": <model name>, "objects": [<object label>, ...]}.
+    These models attach attribute metadata to events on the listed object
+    types, which can later be filtered via the search_objects `attribute`
+    field.
+    """
+    result: List[Dict[str, Any]] = []
+
+    for model_key, model_config in config.classification.custom.items():
+        if not model_config.enabled or model_config.object_config is None:
+            continue
+
+        if (
+            model_config.object_config.classification_type
+            != ObjectClassificationType.attribute
+        ):
+            continue
+
+        result.append(
+            {
+                "name": model_config.name or model_key,
+                "objects": list(model_config.object_config.objects or []),
+            }
+        )
+
+    return result
+
+
+def get_tool_definitions(
+    semantic_search_enabled: bool = False,
+    attribute_classifications: Optional[List[Dict[str, Any]]] = None,
+) -> List[Dict[str, Any]]:
+    """
+    Get OpenAI-compatible tool definitions for Frigate.
+
+    Returns a list of tool definitions that can be used with OpenAI-compatible
+    function calling APIs. When semantic search is enabled, the search_objects
+    tool exposes an additional `semantic_query` parameter for descriptive
+    queries (e.g. "person riding a lawn mower") and find_similar_objects is
+    included. When attribute classification models are configured, an
+    `attribute` parameter is exposed for filtering by their labels.
+    """
+    search_objects_properties: Dict[str, Any] = {
+        "camera": {
+            "type": "string",
+            "description": "Camera name to filter by (optional).",
+        },
+        "label": {
+            "type": "string",
+            "description": (
+                "Generic object class to filter by — one of the tracked detector "
+                "labels such as 'person', 'package', 'car', 'dog', 'bird'. Use "
+                "this for broad queries like 'show me all cars today'. Combine "
+                "with semantic_query when the user also describes appearance or "
+                "behavior (e.g. label='person', semantic_query='riding a lawn "
+                "mower')."
+            ),
+        },
+        "sub_label": {
+            "type": "string",
+            "description": (
+                "Filter by a DISCRETE NAMED entity recognized in the detection. "
+                "Use this for: a known person's name ('John'), a delivery "
+                "company ('Amazon', 'UPS'), a recognized animal species or "
+                "breed ('blue jay', 'cardinal', 'golden retriever'), or a "
+                "license plate string. When filtering by a specific name, set "
+                "only sub_label and leave label unset. Do NOT use sub_label "
+                "for descriptions of appearance, clothing, or actions — those "
+                "belong in semantic_query."
+            ),
+        },
+        "after": {
+            "type": "string",
+            "description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
+        },
+        "before": {
+            "type": "string",
+            "description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
+        },
+        "zones": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "List of zone names to filter by.",
+        },
+        "limit": {
+            "type": "integer",
+            "description": "Maximum number of objects to return (default: 25).",
+            "default": 25,
+        },
+    }
+
+    if attribute_classifications:
+        model_outline = "; ".join(
+            f"{m['name']} (applies to {', '.join(m['objects']) or 'any object'})"
+            for m in attribute_classifications
+        )
+        search_objects_properties["attribute"] = {
+            "type": "string",
+            "description": (
+                "Filter by a classification attribute label produced by a "
+                "configured attribute classification model. Use this INSTEAD "
+                "of semantic_query when the user's request matches one of "
+                "these classifications. Configured models: "
+                f"{model_outline}. "
+                "Set the value to the attribute label that matches the user's "
+                "phrasing (case-sensitive)."
+            ),
+        }
+
+    if semantic_search_enabled:
+        search_objects_properties["semantic_query"] = {
+            "type": "string",
+            "description": (
+                "Optional natural-language description of a PHYSICAL "
+                "CHARACTERISTIC, APPEARANCE, or ACTIVITY the user mentioned, "
+                "used to semantically narrow results. Only set this when the "
+                "user describes something beyond what label and sub_label can "
+                "express on their own.\n"
+                "USE for descriptive phrases like: 'riding a lawn mower', "
+                "'wearing a red jacket', 'carrying a package', 'walking a "
+                "dog', 'on a bicycle', 'holding an umbrella'.\n"
+                "DO NOT USE for:\n"
+                "- specific named people, pets, or delivery companies → use sub_label\n"
+                "- animal species or breed names like 'blue jay', 'cardinal', "
+                "'golden retriever' → use sub_label\n"
+                "- license plate strings → use sub_label\n"
+                "- generic object queries like 'all cars today' or 'every "
+                "person' → use label alone with no semantic_query\n"
+                "When set, combine with label/time/camera/zone filters as "
+                "usual (e.g. label='person', semantic_query='riding a lawn "
+                "mower', after='2024-05-01T00:00:00Z')."
+            ),
+        }
+
+    search_objects_description = (
+        "Search the historical record of detected objects in Frigate. "
+        "Use this ONLY for questions about the PAST — e.g. 'did anyone come by today?', "
+        "'when was the last car?', 'show me detections from yesterday'. "
+        "Do NOT use this for monitoring or alerting requests about future events — "
+        "use start_camera_watch instead for those. "
+        "An 'object' in Frigate represents a tracked detection (e.g., a person, package, car).\n\n"
+        "Choose filters based on what the user is asking for:\n"
+        "- Generic class query ('show me all cars today'): set `label` only.\n"
+        "- Specific NAMED entity (known person, delivery company, animal "
+        "species/breed like 'blue jay' or 'golden retriever', license "
+        "plate): set `sub_label` only and leave `label` unset.\n"
+    )
+    if semantic_search_enabled:
+        search_objects_description += (
+            "- Physical CHARACTERISTIC, APPEARANCE, or ACTIVITY that is not a "
+            "discrete name ('person riding a lawn mower', 'someone in a red "
+            "jacket', 'person carrying a package'): set `semantic_query` with "
+            "the descriptive phrase, optionally alongside `label` for the "
+            "object class. Do NOT put descriptive phrases in sub_label."
+        )
+
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "search_objects",
+                "description": search_objects_description,
+                "parameters": {
+                    "type": "object",
+                    "properties": search_objects_properties,
+                },
+                "required": [],
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "find_similar_objects",
+                "description": (
+                    "Find tracked objects that are visually and semantically similar "
+                    "to a specific past event. Use this when the user references a "
+                    "particular object they have seen and wants to find other "
+                    "sightings of the same or similar one ('that green car', 'the "
+                    "person in the red jacket', 'the package that was delivered'). "
+                    "Prefer this over search_objects whenever the user's intent is "
+                    "'find more like this specific one.' Use search_objects first "
+                    "only if you need to locate the anchor event. Requires semantic "
+                    "search to be enabled."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "event_id": {
+                            "type": "string",
+                            "description": "The id of the anchor event to find similar objects to.",
+                        },
+                        "after": {
+                            "type": "string",
+                            "description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
+                        },
+                        "before": {
+                            "type": "string",
+                            "description": "End time in ISO 8601 format (e.g., '2024-01-01T23:59:59Z').",
+                        },
+                        "cameras": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of cameras to restrict to. Defaults to all.",
+                        },
+                        "labels": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of labels to restrict to. Defaults to the anchor event's label.",
+                        },
+                        "sub_labels": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of sub_labels (names) to restrict to.",
+                        },
+                        "zones": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Optional list of zones. An event matches if any of its zones overlap.",
+                        },
+                        "similarity_mode": {
+                            "type": "string",
+                            "enum": ["visual", "semantic", "fused"],
+                            "description": "Which similarity signal(s) to use. 'fused' (default) combines visual and semantic.",
+                            "default": "fused",
+                        },
+                        "min_score": {
+                            "type": "number",
+                            "description": "Drop matches with a similarity score below this threshold (0.0-1.0).",
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of matches to return (default: 10).",
+                            "default": 10,
+                        },
+                    },
+                    "required": ["event_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "set_camera_state",
+                "description": (
+                    "Change a camera's feature state (e.g., turn detection on/off, enable/disable recordings). "
+                    "Use camera='*' to apply to all cameras at once. "
+                    "Only call this tool when the user explicitly asks to change a camera setting. "
+                    "Requires admin privileges."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "camera": {
+                            "type": "string",
+                            "description": "Camera name to target, or '*' to target all cameras.",
+                        },
+                        "feature": {
+                            "type": "string",
+                            "enum": [
+                                "detect",
+                                "record",
+                                "snapshots",
+                                "audio",
+                                "motion",
+                                "enabled",
+                                "birdseye",
+                                "birdseye_mode",
+                                "improve_contrast",
+                                "ptz_autotracker",
+                                "motion_contour_area",
+                                "motion_threshold",
+                                "notifications",
+                                "audio_transcription",
+                                "review_alerts",
+                                "review_detections",
+                                "object_descriptions",
+                                "review_descriptions",
+                                "profile",
+                            ],
+                            "description": (
+                                "The feature to change. Most features accept ON or OFF. "
+                                "birdseye_mode accepts CONTINUOUS, MOTION, or OBJECTS. "
+                                "motion_contour_area and motion_threshold accept a number. "
+                                "profile accepts a profile name or 'none' to deactivate (requires camera='*')."
+                            ),
+                        },
+                        "value": {
+                            "type": "string",
+                            "description": "The value to set. ON or OFF for toggles, a number for thresholds, a profile name or 'none' for profile.",
+                        },
+                    },
+                    "required": ["camera", "feature", "value"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_live_context",
+                "description": (
+                    "Get the current live image and detection information for a camera: objects being tracked, "
+                    "zones, timestamps. Use this to understand what is visible in the live view. "
+                    "Call this when answering questions about what is happening right now on a specific camera."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "camera": {
+                            "type": "string",
+                            "description": "Camera name to get live context for.",
+                        },
+                    },
+                    "required": ["camera"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "start_camera_watch",
+                "description": (
+                    "Start a continuous VLM watch job that monitors a camera and sends a notification "
+                    "when a specified condition is met. Use this when the user wants to be alerted about "
+                    "a future event, e.g. 'tell me when guests arrive' or 'notify me when the package is picked up'. "
+                    "Only one watch job can run at a time. Returns a job ID."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "camera": {
+                            "type": "string",
+                            "description": "Camera ID to monitor.",
+                        },
+                        "condition": {
+                            "type": "string",
+                            "description": (
+                                "Natural-language description of the condition to watch for, "
+                                "e.g. 'a person arrives at the front door'."
+                            ),
+                        },
+                        "max_duration_minutes": {
+                            "type": "integer",
+                            "description": "Maximum time to watch before giving up (minutes, default 60).",
+                            "default": 60,
+                        },
+                        "labels": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Object labels that should trigger a VLM check (e.g. ['person', 'car']). If omitted, any detection on the camera triggers a check.",
+                        },
+                        "zones": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Zone names to filter by. If specified, only detections in these zones trigger a VLM check.",
+                        },
+                    },
+                    "required": ["camera", "condition"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "stop_camera_watch",
+                "description": (
+                    "Cancel the currently running VLM watch job. Use this when the user wants to "
+                    "stop a previously started watch, e.g. 'stop watching the front door'."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": [],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_profile_status",
+                "description": (
+                    "Get the current profile status including the active profile and "
+                    "timestamps of when each profile was last activated. Use this to "
+                    "determine time periods for recap requests — e.g. when the user asks "
+                    "'what happened while I was away?', call this first to find the relevant "
+                    "time window based on profile activation history."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": [],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_recap",
+                "description": (
+                    "Get a recap of all activity (alerts and detections) for a given time period. "
+                    "Use this after calling get_profile_status to retrieve what happened during "
+                    "a specific window — e.g. 'what happened while I was away?'. Returns a "
+                    "chronological list of activity with camera, objects, zones, and GenAI-generated "
+                    "descriptions when available. Summarize the results for the user."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "after": {
+                            "type": "string",
+                            "description": "Start of the time period in ISO 8601 format (e.g. '2025-03-15T08:00:00').",
+                        },
+                        "before": {
+                            "type": "string",
+                            "description": "End of the time period in ISO 8601 format (e.g. '2025-03-15T17:00:00').",
+                        },
+                        "cameras": {
+                            "type": "string",
+                            "description": "Comma-separated camera IDs to include, or 'all' for all cameras. Default is 'all'.",
+                        },
+                        "severity": {
+                            "type": "string",
+                            "enum": ["alert", "detection"],
+                            "description": "Filter by severity level. Omit to include both alerts and detections.",
+                        },
+                    },
+                    "required": ["after", "before"],
+                },
+            },
+        },
+    ]
+
+
+def build_chat_system_prompt(
+    config: FrigateConfig,
+    allowed_cameras: List[str],
+    semantic_search_enabled: bool,
+    attribute_classifications: List[Dict[str, Any]],
+) -> str:
+    """Build the system prompt for the chat completion endpoint.
+
+    Composes the static framing with conditional sections describing the
+    available cameras, speed units, semantic-search routing guidance, and
+    configured attribute classifications.
+    """
+    current_datetime = datetime.datetime.now()
+    current_date_str = current_datetime.strftime("%Y-%m-%d")
+    current_time_str = current_datetime.strftime("%I:%M:%S %p")
+
+    cameras_info: List[str] = []
+    has_speed_zone = False
+    for camera_id in allowed_cameras:
+        if camera_id not in config.cameras:
+            continue
+        camera_config = config.cameras[camera_id]
+        friendly_name = (
+            camera_config.friendly_name
+            if camera_config.friendly_name
+            else camera_id.replace("_", " ").title()
+        )
+        zone_names = list(camera_config.zones.keys())
+        if not has_speed_zone:
+            has_speed_zone = any(
+                zone.distances for zone in camera_config.zones.values()
+            )
+        if zone_names:
+            cameras_info.append(
+                f"  - {friendly_name} (ID: {camera_id}, zones: {', '.join(zone_names)})"
+            )
+        else:
+            cameras_info.append(f"  - {friendly_name} (ID: {camera_id})")
+
+    cameras_section = ""
+    if cameras_info:
+        cameras_section = (
+            "\n\nAvailable cameras:\n"
+            + "\n".join(cameras_info)
+            + "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
+        )
+
+    speed_units_section = ""
+    if has_speed_zone:
+        speed_unit = (
+            "mph" if config.ui.unit_system == UnitSystemEnum.imperial else "km/h"
+        )
+        speed_units_section = f"\n\nReport object speeds to the user in {speed_unit}."
+
+    semantic_search_section = ""
+    if semantic_search_enabled:
+        semantic_search_section = (
+            "\n\nWhen routing a search_objects call, pick filters by the shape of the user's request:\n"
+            "- Generic class ('show me all cars today'): set `label` only.\n"
+            "- Specific named entity — a known person ('John'), delivery company ('Amazon'), animal species/breed ('blue jay', 'cardinal', 'golden retriever'), or license plate: set `sub_label` only and leave `label` unset.\n"
+            "- Physical characteristic, appearance, or activity that is NOT a discrete name ('find me people riding a lawn mower', 'someone in a red jacket', 'a person carrying a package'): set `semantic_query` with the descriptive phrase, optionally combined with `label` for the object class. Never put descriptive phrases in `sub_label`."
+        )
+
+    attribute_classification_section = ""
+    if attribute_classifications:
+        model_lines = "\n".join(
+            f"- {m['name']}: applies to {', '.join(m['objects']) or 'any object'}"
+            for m in attribute_classifications
+        )
+        attribute_classification_section = (
+            "\n\nAttribute classification models are configured for the following object types:\n"
+            f"{model_lines}\n"
+            "When the user's request matches one of these classifications, set the search_objects `attribute` field to the matching label rather than using `semantic_query`. Reserve `semantic_query` for descriptive phrases that fall outside the configured attribute labels."
+        )
+
+    return f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
+
+Current server local date and time: {current_date_str} at {current_time_str}
+
+Do not start your response with phrases like "I will check...", "Let me see...", or "Let me look...". Answer directly.
+
+Always present times to the user in the server's local timezone. When tool results include start_time_local and end_time_local, use those exact strings when listing or describing detection times—do not convert or invent timestamps. Do not use UTC or ISO format with Z for the user-facing answer unless the tool result only provides Unix timestamps without local time fields.
+When users ask about "today", "yesterday", "this week", etc., use the current date above as reference.
+When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
+Always be accurate with time calculations based on the current date provided.
+
+When a user refers to a specific object they have seen or describe with identifying details ("that green car", "the person in the red jacket", "a package left today"), prefer the find_similar_objects tool over search_objects. Use search_objects first only to locate the anchor event, then pass its id to find_similar_objects. For generic queries like "show me all cars today", keep using search_objects. If a user message begins with [attached_event:<id>], treat that event id as the anchor for any similarity or "tell me more" request in the same message and call find_similar_objects with that id.{semantic_search_section}{attribute_classification_section}{cameras_section}{speed_units_section}"""
--- a/web/public/locales/en/views/chat.json
+++ b/web/public/locales/en/views/chat.json
@ -60,5 +60,10 @@
  "stats": {
    "context": "{{tokens}} tokens",
    "tokens_per_second": "{{rate}} t/s"
+  },
+  "reasoning": {
+    "active": "Reasoning…",
+    "show": "Show reasoning",
+    "hide": "Hide reasoning"
  }
 }
--- a/web/src/components/chat/ReasoningBubble.tsx
+++ b/web/src/components/chat/ReasoningBubble.tsx
@ -0,0 +1,87 @@
+import { useState, useEffect, useRef } from "react";
+import { useTranslation } from "react-i18next";
+import { LuBrain, LuChevronDown, LuChevronRight } from "react-icons/lu";
+import {
+  Collapsible,
+  CollapsibleContent,
+  CollapsibleTrigger,
+} from "@/components/ui/collapsible";
+import { Button } from "@/components/ui/button";
+import { cn } from "@/lib/utils";
+
+type ReasoningBubbleProps = {
+  /** The accumulated reasoning text from the model. */
+  reasoning: string;
+  /**
+   * Whether the assistant has begun producing the user-facing answer.
+   * While false the reasoning is still streaming and we keep the panel
+   * open with a "Thinking…" label. Once true, the panel auto-collapses
+   * so the answer is the primary focus, but stays expandable.
+   */
+  answerStarted: boolean;
+};
+
+export function ReasoningBubble({
+  reasoning,
+  answerStarted,
+}: ReasoningBubbleProps) {
+  const { t } = useTranslation(["views/chat"]);
+  // Open while the model is still mid-thought (no answer tokens yet);
+  // once the answer begins, collapse on its own but let the user reopen.
+  const [open, setOpen] = useState(true);
+  const userInteractedRef = useRef(false);
+  const lastAutoState = useRef(true);
+
+  useEffect(() => {
+    if (userInteractedRef.current) return;
+    const desired = !answerStarted;
+    if (desired !== lastAutoState.current) {
+      lastAutoState.current = desired;
+      setOpen(desired);
+    }
+  }, [answerStarted]);
+
+  const handleOpenChange = (next: boolean) => {
+    userInteractedRef.current = true;
+    setOpen(next);
+  };
+
+  const label = !answerStarted
+    ? t("reasoning.active")
+    : open
+      ? t("reasoning.hide")
+      : t("reasoning.show");
+
+  return (
+    <div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
+      <Collapsible open={open} onOpenChange={handleOpenChange}>
+        <CollapsibleTrigger asChild>
+          <Button
+            variant="ghost"
+            size="sm"
+            className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
+          >
+            <LuBrain
+              className={cn(
+                "size-3 shrink-0",
+                !answerStarted && "animate-pulse",
+              )}
+            />
+            <span className="break-words font-medium">{label}</span>
+            {answerStarted &&
+              (open ? (
+                <LuChevronDown className="ml-auto size-3 shrink-0" />
+              ) : (
+                <LuChevronRight className="ml-auto size-3 shrink-0" />
+              ))}
+          </Button>
+        </CollapsibleTrigger>
+        <CollapsibleContent>
+          <pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
+            {reasoning}
+          </pre>
+        </CollapsibleContent>
+      </Collapsible>
+    </div>
+  );
+}
--- a/web/src/pages/Chat.tsx
+++ b/web/src/pages/Chat.tsx
@ -7,6 +7,7 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react";
 import axios from "axios";
 import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
 import { MessageBubble } from "@/components/chat/ChatMessage";
+import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
 import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
 import { ChatStartingState } from "@/components/chat/ChatStartingState";
 import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
@ -200,15 +201,21 @@ export default function ChatPage() {
                  const hasToolCalls =
                    msg.toolCalls && msg.toolCalls.length > 0;
                  const hasContent = !!msg.content?.trim();
+                  const hasReasoning = !!msg.reasoning?.trim();
                  const showProcessing =
-                    isLastAssistant && isLoading && !hasContent;
+                    isLastAssistant &&
+                    isLoading &&
+                    !hasContent &&
+                    !hasReasoning;

-                  // Hide empty placeholder only when there are no tool calls yet
+                  // Hide empty placeholder only when there are no tool calls
+                  // and no reasoning streaming yet
                  if (
                    isLastAssistant &&
                    isLoading &&
                    !hasContent &&
-                    !hasToolCalls
+                    !hasToolCalls &&
+                    !hasReasoning
                  )
                    return (
                      <div
@ -226,13 +233,22 @@ export default function ChatPage() {
                      {msg.role === "assistant" && hasToolCalls && (
                        <ToolCallsGroup toolCalls={msg.toolCalls!} />
                      )}
+                      {msg.role === "assistant" && hasReasoning && (
+                        <ReasoningBubble
+                          reasoning={msg.reasoning!}
+                          answerStarted={hasContent}
+                        />
+                      )}
                      {showProcessing ? (
                        <div className="flex items-center gap-2 self-start rounded-2xl bg-muted px-5 py-4">
                          <span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.3s]" />
                          <span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.15s]" />
                          <span className="size-2 animate-bounce rounded-full bg-muted-foreground/60" />
                        </div>
-                      ) : (
+                      ) : msg.role === "assistant" &&
+                        !hasContent &&
+                        hasReasoning &&
+                        !isComplete ? null : (
                        <MessageBubble
                          role={msg.role}
                          content={msg.content}
--- a/web/src/types/chat.ts
+++ b/web/src/types/chat.ts
@ -7,6 +7,7 @@ export type ToolCall = {
 export type ChatMessage = {
  role: "user" | "assistant";
  content: string;
+  reasoning?: string;
  toolCalls?: ToolCall[];
  stats?: ChatStats;
 };
--- a/web/src/utils/chatUtil.ts
+++ b/web/src/utils/chatUtil.ts
@ -27,6 +27,7 @@ type StreamChunk =
  | { type: "error"; error: string }
  | { type: "tool_calls"; tool_calls: ToolCall[] }
  | { type: "content"; delta: string }
+  | { type: "reasoning"; delta: string }
  | StatsChunk;

 /**
@ -109,6 +110,19 @@ export async function streamChatCompletion(
        });
        return "continue";
      }
+      if (data.type === "reasoning" && data.delta !== undefined) {
+        updateMessages((prev) => {
+          const next = [...prev];
+          const lastMsg = next[next.length - 1];
+          if (lastMsg?.role === "assistant")
+            next[next.length - 1] = {
+              ...lastMsg,
+              reasoning: (lastMsg.reasoning ?? "") + data.delta,
+            };
+          return next;
+        });
+        return "continue";
+      }
      if (data.type === "stats") {
        const stats: ChatStats = {
          promptTokens: data.prompt_tokens,