Add GenAI Backend Streaming and Chat (#22152)

* Add basic chat page with entry * Add chat history * processing * Add markdown * Improvements * Adjust timing format * Reduce fields in response * More time parsing improvements * Show tool calls separately from message * Add title * Improve UI handling * Support streaming * Full streaming support * Fix tool calling * Add copy button * Improvements to UI * Improve default behavior * Implement message editing * Add sub label to event tool filtering * Cleanup * Cleanup UI and prompt * Cleanup UI bubbles * Fix loading * Add support for markdown tables * Add thumbnail images to object results * Add a starting state for chat * Clenaup
2026-06-24 13:21:52 +03:00 · 2026-02-27 09:07:30 -07:00 · 2026-02-27 09:07:30 -07:00 · fa1f9a1fa4
commit fa1f9a1fa4
parent e7250f24cb
20 changed files with 3097 additions and 291 deletions
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@ -3,12 +3,13 @@
 import base64
 import json
 import logging
-from datetime import datetime, timezone
+import time
-from typing import Any, Dict, List, Optional
+from datetime import datetime
 from typing import Any, Dict, Generator, List, Optional
 import cv2
 from fastapi import APIRouter, Body, Depends, Request
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel
 from frigate.api.auth import (
@ -20,15 +21,60 @@ from frigate.api.defs.request.chat_body import ChatCompletionRequest
 from frigate.api.defs.response.chat_response import (
    ChatCompletionResponse,
    ChatMessageResponse,
    ToolCall,
 )
 from frigate.api.defs.tags import Tags
 from frigate.api.event import events
 from frigate.genai.utils import build_assistant_message_for_conversation
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=[Tags.chat])
 def _chunk_content(content: str, chunk_size: int = 80) -> Generator[str, None, None]:
    """Yield content in word-aware chunks for streaming."""
    if not content:
        return
    words = content.split(" ")
    current: List[str] = []
    current_len = 0
    for w in words:
        current.append(w)
        current_len += len(w) + 1
        if current_len >= chunk_size:
            yield " ".join(current) + " "
            current = []
            current_len = 0
    if current:
        yield " ".join(current)
 def _format_events_with_local_time(
    events_list: List[Dict[str, Any]],
 ) -> List[Dict[str, Any]]:
    """Add human-readable local start/end times to each event for the LLM."""
    result = []
    for evt in events_list:
        if not isinstance(evt, dict):
            result.append(evt)
            continue
        copy_evt = dict(evt)
        try:
            start_ts = evt.get("start_time")
            end_ts = evt.get("end_time")
            if start_ts is not None:
                dt_start = datetime.fromtimestamp(start_ts)
                copy_evt["start_time_local"] = dt_start.strftime("%Y-%m-%d %I:%M:%S %p")
            if end_ts is not None:
                dt_end = datetime.fromtimestamp(end_ts)
                copy_evt["end_time_local"] = dt_end.strftime("%Y-%m-%d %I:%M:%S %p")
        except (TypeError, ValueError, OSError):
            pass
        result.append(copy_evt)
    return result
 class ToolExecuteRequest(BaseModel):
    """Request model for tool execution."""
@ -52,19 +98,25 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
                    "Search for detected objects in Frigate by camera, object label, time range, "
                    "zones, and other filters. Use this to answer questions about when "
                    "objects were detected, what objects appeared, or to find specific object detections. "
-                    "An 'object' in Frigate represents a tracked detection (e.g., a person, package, car)."
+                    "An 'object' in Frigate represents a tracked detection (e.g., a person, package, car). "
                    "When the user asks about a specific name (person, delivery company, animal, etc.), "
                    "filter by sub_label only and do not set label."
                ),
                "parameters": {
                    "type": "object",
                    "properties": {
                        "camera": {
                            "type": "string",
-                            "description": "Camera name to filter by (optional). Use 'all' for all cameras.",
+                            "description": "Camera name to filter by (optional).",
                        },
                        "label": {
                            "type": "string",
                            "description": "Object label to filter by (e.g., 'person', 'package', 'car').",
                        },
                        "sub_label": {
                            "type": "string",
                            "description": "Name of a person, delivery company, animal, etc. When filtering by a specific name, use only sub_label; do not set label.",
                        },
                        "after": {
                            "type": "string",
                            "description": "Start time in ISO 8601 format (e.g., '2024-01-01T00:00:00Z').",
@ -80,8 +132,8 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
                        },
                        "limit": {
                            "type": "integer",
-                            "description": "Maximum number of objects to return (default: 10).",
+                            "description": "Maximum number of objects to return (default: 25).",
-                            "default": 10,
+                            "default": 25,
                        },
                    },
                },
@ -119,14 +171,13 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
    summary="Get available tools",
    description="Returns OpenAI-compatible tool definitions for function calling.",
 )
-def get_tools(request: Request) -> JSONResponse:
+def get_tools() -> JSONResponse:
    """Get list of available tools for LLM function calling."""
    tools = get_tool_definitions()
    return JSONResponse(content={"tools": tools})
 async def _execute_search_objects(
    request: Request,
    arguments: Dict[str, Any],
    allowed_cameras: List[str],
 ) -> JSONResponse:
@ -136,23 +187,26 @@ async def _execute_search_objects(
    This searches for detected objects (events) in Frigate using the same
    logic as the events API endpoint.
    """
-    # Parse ISO 8601 timestamps to Unix timestamps if provided
+    # Parse after/before as server local time; convert to Unix timestamp
    after = arguments.get("after")
    before = arguments.get("before")
    def _parse_as_local_timestamp(s: str):
        s = s.replace("Z", "").strip()[:19]
        dt = datetime.strptime(s, "%Y-%m-%dT%H:%M:%S")
        return time.mktime(dt.timetuple())
    if after:
        try:
-            after_dt = datetime.fromisoformat(after.replace("Z", "+00:00"))
+            after = _parse_as_local_timestamp(after)
-            after = after_dt.timestamp()
+        except (ValueError, AttributeError, TypeError):
        except (ValueError, AttributeError):
            logger.warning(f"Invalid 'after' timestamp format: {after}")
            after = None
    if before:
        try:
-            before_dt = datetime.fromisoformat(before.replace("Z", "+00:00"))
+            before = _parse_as_local_timestamp(before)
-            before = before_dt.timestamp()
+        except (ValueError, AttributeError, TypeError):
        except (ValueError, AttributeError):
            logger.warning(f"Invalid 'before' timestamp format: {before}")
            before = None
@ -165,15 +219,14 @@ async def _execute_search_objects(
    # Build query parameters compatible with EventsQueryParams
    query_params = EventsQueryParams(
        camera=arguments.get("camera", "all"),
        cameras=arguments.get("camera", "all"),
        label=arguments.get("label", "all"),
        labels=arguments.get("label", "all"),
        sub_labels=arguments.get("sub_label", "all").lower(),
        zones=zones,
        zone=zones,
        after=after,
        before=before,
-        limit=arguments.get("limit", 10),
+        limit=arguments.get("limit", 25),
    )
    try:
@ -202,7 +255,6 @@ async def _execute_search_objects(
    description="Execute a tool function call from an LLM.",
 )
 async def execute_tool(
    request: Request,
    body: ToolExecuteRequest = Body(...),
    allowed_cameras: List[str] = Depends(get_allowed_cameras_for_filter),
 ) -> JSONResponse:
@ -218,7 +270,7 @@ async def execute_tool(
    logger.debug(f"Executing tool: {tool_name} with arguments: {arguments}")
    if tool_name == "search_objects":
-        return await _execute_search_objects(request, arguments, allowed_cameras)
+        return await _execute_search_objects(arguments, allowed_cameras)
    return JSONResponse(
        content={
@ -334,7 +386,7 @@ async def _execute_tool_internal(
    This is used by the chat completion endpoint to execute tools.
    """
    if tool_name == "search_objects":
-        response = await _execute_search_objects(request, arguments, allowed_cameras)
+        response = await _execute_search_objects(arguments, allowed_cameras)
        try:
            if hasattr(response, "body"):
                body_str = response.body.decode("utf-8")
@ -349,15 +401,109 @@ async def _execute_tool_internal(
    elif tool_name == "get_live_context":
        camera = arguments.get("camera")
        if not camera:
            logger.error(
                "Tool get_live_context failed: camera parameter is required. "
                "Arguments: %s",
                json.dumps(arguments),
            )
            return {"error": "Camera parameter is required"}
        return await _execute_get_live_context(request, camera, allowed_cameras)
    else:
        logger.error(
            "Tool call failed: unknown tool %r. Expected one of: search_objects, get_live_context. "
            "Arguments received: %s",
            tool_name,
            json.dumps(arguments),
        )
        return {"error": f"Unknown tool: {tool_name}"}
 async def _execute_pending_tools(
    pending_tool_calls: List[Dict[str, Any]],
    request: Request,
    allowed_cameras: List[str],
 ) -> tuple[List[ToolCall], List[Dict[str, Any]]]:
    """
    Execute a list of tool calls; return (ToolCall list for API response, tool result dicts for conversation).
    """
    tool_calls_out: List[ToolCall] = []
    tool_results: List[Dict[str, Any]] = []
    for tool_call in pending_tool_calls:
        tool_name = tool_call["name"]
        tool_args = tool_call.get("arguments") or {}
        tool_call_id = tool_call["id"]
        logger.debug(
            f"Executing tool: {tool_name} (id: {tool_call_id}) with arguments: {json.dumps(tool_args, indent=2)}"
        )
        try:
            tool_result = await _execute_tool_internal(
                tool_name, tool_args, request, allowed_cameras
            )
            if isinstance(tool_result, dict) and tool_result.get("error"):
                logger.error(
                    "Tool call %s (id: %s) returned error: %s. Arguments: %s",
                    tool_name,
                    tool_call_id,
                    tool_result.get("error"),
                    json.dumps(tool_args),
                )
            if tool_name == "search_objects" and isinstance(tool_result, list):
                tool_result = _format_events_with_local_time(tool_result)
                _keys = {
                    "id",
                    "camera",
                    "label",
                    "zones",
                    "start_time_local",
                    "end_time_local",
                    "sub_label",
                    "event_count",
                }
                tool_result = [
                    {k: evt[k] for k in _keys if k in evt}
                    for evt in tool_result
                    if isinstance(evt, dict)
                ]
            result_content = (
                json.dumps(tool_result)
                if isinstance(tool_result, (dict, list))
                else (tool_result if isinstance(tool_result, str) else str(tool_result))
            )
            tool_calls_out.append(
                ToolCall(name=tool_name, arguments=tool_args, response=result_content)
            )
            tool_results.append(
                {
                    "role": "tool",
                    "tool_call_id": tool_call_id,
                    "content": result_content,
                }
            )
        except Exception as e:
            logger.error(
                "Error executing tool %s (id: %s): %s. Arguments: %s",
                tool_name,
                tool_call_id,
                e,
                json.dumps(tool_args),
                exc_info=True,
            )
            error_content = json.dumps({"error": f"Tool execution failed: {str(e)}"})
            tool_calls_out.append(
                ToolCall(name=tool_name, arguments=tool_args, response=error_content)
            )
            tool_results.append(
                {
                    "role": "tool",
                    "tool_call_id": tool_call_id,
                    "content": error_content,
                }
            )
    return (tool_calls_out, tool_results)
@router.post(
    "/chat/completion",
    response_model=ChatCompletionResponse,
    dependencies=[Depends(allow_any_authenticated())],
    summary="Chat completion with tool calling",
    description=(
@ -369,7 +515,7 @@ async def chat_completion(
    request: Request,
    body: ChatCompletionRequest = Body(...),
    allowed_cameras: List[str] = Depends(get_allowed_cameras_for_filter),
-) -> JSONResponse:
+):
    """
    Chat completion endpoint with tool calling support.
@ -394,9 +540,9 @@ async def chat_completion(
    tools = get_tool_definitions()
    conversation = []
-    current_datetime = datetime.now(timezone.utc)
+    current_datetime = datetime.now()
    current_date_str = current_datetime.strftime("%Y-%m-%d")
-    current_time_str = current_datetime.strftime("%H:%M:%S %Z")
+    current_time_str = current_datetime.strftime("%I:%M:%S %p")
    cameras_info = []
    config = request.app.frigate_config
@ -429,9 +575,12 @@ async def chat_completion(
    system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
-Current date and time: {current_date_str} at {current_time_str} (UTC)
+Current server local date and time: {current_date_str} at {current_time_str}
-When users ask questions about "today", "yesterday", "this week", etc., use the current date above as reference.
+Do not start your response with phrases like "I will check...", "Let me see...", or "Let me look...". Answer directly.
 Always present times to the user in the server's local timezone. When tool results include start_time_local and end_time_local, use those exact strings when listing or describing detection times—do not convert or invent timestamps. Do not use UTC or ISO format with Z for the user-facing answer unless the tool result only provides Unix timestamps without local time fields.
 When users ask about "today", "yesterday", "this week", etc., use the current date above as reference.
 When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
 Always be accurate with time calculations based on the current date provided.{cameras_section}{live_image_note}"""
@ -471,6 +620,7 @@ Always be accurate with time calculations based on the current date provided.{ca
        conversation.append(msg_dict)
    tool_iterations = 0
    tool_calls: List[ToolCall] = []
    max_iterations = body.max_tool_iterations
    logger.debug(
@ -478,6 +628,81 @@ Always be accurate with time calculations based on the current date provided.{ca
        f"{len(tools)} tool(s) available, max_iterations={max_iterations}"
    )
    # True LLM streaming when client supports it and stream requested
    if body.stream and hasattr(genai_client, "chat_with_tools_stream"):
        stream_tool_calls: List[ToolCall] = []
        stream_iterations = 0
        async def stream_body_llm():
            nonlocal conversation, stream_tool_calls, stream_iterations
            while stream_iterations < max_iterations:
                logger.debug(
                    f"Streaming LLM (iteration {stream_iterations + 1}/{max_iterations}) "
                    f"with {len(conversation)} message(s)"
                )
                async for event in genai_client.chat_with_tools_stream(
                    messages=conversation,
                    tools=tools if tools else None,
                    tool_choice="auto",
                ):
                    kind, value = event
                    if kind == "content_delta":
                        yield (
                            json.dumps({"type": "content", "delta": value}).encode(
                                "utf-8"
                            )
                            + b"\n"
                        )
                    elif kind == "message":
                        msg = value
                        if msg.get("finish_reason") == "error":
                            yield (
                                json.dumps(
                                    {
                                        "type": "error",
                                        "error": "An error occurred while processing your request.",
                                    }
                                ).encode("utf-8")
                                + b"\n"
                            )
                            return
                        pending = msg.get("tool_calls")
                        if pending:
                            stream_iterations += 1
                            conversation.append(
                                build_assistant_message_for_conversation(
                                    msg.get("content"), pending
                                )
                            )
                            executed_calls, tool_results = await _execute_pending_tools(
                                pending, request, allowed_cameras
                            )
                            stream_tool_calls.extend(executed_calls)
                            conversation.extend(tool_results)
                            yield (
                                json.dumps(
                                    {
                                        "type": "tool_calls",
                                        "tool_calls": [
                                            tc.model_dump() for tc in stream_tool_calls
                                        ],
                                    }
                                ).encode("utf-8")
                                + b"\n"
                            )
                            break
                        else:
                            yield (json.dumps({"type": "done"}).encode("utf-8") + b"\n")
                            return
            else:
                yield json.dumps({"type": "done"}).encode("utf-8") + b"\n"
        return StreamingResponse(
            stream_body_llm(),
            media_type="application/x-ndjson",
            headers={"X-Accel-Buffering": "no"},
        )
    try:
        while tool_iterations < max_iterations:
            logger.debug(
@ -499,117 +724,71 @@ Always be accurate with time calculations based on the current date provided.{ca
                    status_code=500,
                )
-            assistant_message = {
+            conversation.append(
-                "role": "assistant",
+                build_assistant_message_for_conversation(
-                "content": response.get("content"),
+                    response.get("content"), response.get("tool_calls")
-            }
+                )
-            if response.get("tool_calls"):
+            )
                assistant_message["tool_calls"] = [
                    {
                        "id": tc["id"],
                        "type": "function",
                        "function": {
                            "name": tc["name"],
                            "arguments": json.dumps(tc["arguments"]),
                        },
                    }
                    for tc in response["tool_calls"]
                ]
            conversation.append(assistant_message)
-            tool_calls = response.get("tool_calls")
+            pending_tool_calls = response.get("tool_calls")
-            if not tool_calls:
+            if not pending_tool_calls:
                logger.debug(
                    f"Chat completion finished with final answer (iterations: {tool_iterations})"
                )
                final_content = response.get("content") or ""
                if body.stream:
                    async def stream_body() -> Any:
                        if tool_calls:
                            yield (
                                json.dumps(
                                    {
                                        "type": "tool_calls",
                                        "tool_calls": [
                                            tc.model_dump() for tc in tool_calls
                                        ],
                                    }
                                ).encode("utf-8")
                                + b"\n"
                            )
                        # Stream content in word-sized chunks for smooth UX
                        for part in _chunk_content(final_content):
                            yield (
                                json.dumps({"type": "content", "delta": part}).encode(
                                    "utf-8"
                                )
                                + b"\n"
                            )
                        yield json.dumps({"type": "done"}).encode("utf-8") + b"\n"
                    return StreamingResponse(
                        stream_body(),
                        media_type="application/x-ndjson",
                    )
                return JSONResponse(
                    content=ChatCompletionResponse(
                        message=ChatMessageResponse(
                            role="assistant",
-                            content=response.get("content"),
+                            content=final_content,
                            tool_calls=None,
                        ),
                        finish_reason=response.get("finish_reason", "stop"),
                        tool_iterations=tool_iterations,
                        tool_calls=tool_calls,
                    ).model_dump(),
                )
            # Execute tools
            tool_iterations += 1
            logger.debug(
                f"Tool calls detected (iteration {tool_iterations}/{max_iterations}): "
-                f"{len(tool_calls)} tool(s) to execute"
+                f"{len(pending_tool_calls)} tool(s) to execute"
            )
-            tool_results = []
+            executed_calls, tool_results = await _execute_pending_tools(
-
+                pending_tool_calls, request, allowed_cameras
-            for tool_call in tool_calls:
+            )
-                tool_name = tool_call["name"]
+            tool_calls.extend(executed_calls)
                tool_args = tool_call["arguments"]
                tool_call_id = tool_call["id"]
                logger.debug(
                    f"Executing tool: {tool_name} (id: {tool_call_id}) with arguments: {json.dumps(tool_args, indent=2)}"
                )
                try:
                    tool_result = await _execute_tool_internal(
                        tool_name, tool_args, request, allowed_cameras
                    )
                    if isinstance(tool_result, dict):
                        result_content = json.dumps(tool_result)
                        result_summary = tool_result
                        if isinstance(tool_result, dict) and isinstance(
                            tool_result.get("content"), list
                        ):
                            result_count = len(tool_result.get("content", []))
                            result_summary = {
                                "count": result_count,
                                "sample": tool_result.get("content", [])[:2]
                                if result_count > 0
                                else [],
                            }
                        logger.debug(
                            f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
                            f"Result: {json.dumps(result_summary, indent=2)}"
                        )
                    elif isinstance(tool_result, str):
                        result_content = tool_result
                        logger.debug(
                            f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
                            f"Result length: {len(result_content)} characters"
                        )
                    else:
                        result_content = str(tool_result)
                        logger.debug(
                            f"Tool {tool_name} (id: {tool_call_id}) completed successfully. "
                            f"Result type: {type(tool_result).__name__}"
                        )
                    tool_results.append(
                        {
                            "role": "tool",
                            "tool_call_id": tool_call_id,
                            "content": result_content,
                        }
                    )
                except Exception as e:
                    logger.error(
                        f"Error executing tool {tool_name} (id: {tool_call_id}): {e}",
                        exc_info=True,
                    )
                    error_content = json.dumps({"error": "Tool execution failed"})
                    tool_results.append(
                        {
                            "role": "tool",
                            "tool_call_id": tool_call_id,
                            "content": error_content,
                        }
                    )
                    logger.debug(
                        f"Tool {tool_name} (id: {tool_call_id}) failed. Error result added to conversation."
                    )
            conversation.extend(tool_results)
            logger.debug(
                f"Added {len(tool_results)} tool result(s) to conversation. "
@ -628,6 +807,7 @@ Always be accurate with time calculations based on the current date provided.{ca
                ),
                finish_reason="length",
                tool_iterations=tool_iterations,
                tool_calls=tool_calls,
            ).model_dump(),
        )
--- a/frigate/api/defs/request/chat_body.py
+++ b/frigate/api/defs/request/chat_body.py
@ -39,3 +39,7 @@ class ChatCompletionRequest(BaseModel):
            "user message as multimodal content. Use with get_live_context for detection info."
        ),
    )
    stream: bool = Field(
        default=False,
        description="If true, stream the final assistant response in the body as newline-delimited JSON.",
    )
--- a/frigate/api/defs/response/chat_response.py
+++ b/frigate/api/defs/response/chat_response.py
@ -5,8 +5,8 @@ from typing import Any, Optional
 from pydantic import BaseModel, Field
-class ToolCall(BaseModel):
+class ToolCallInvocation(BaseModel):
-    """A tool call from the LLM."""
+    """A tool call requested by the LLM (before execution)."""
    id: str = Field(description="Unique identifier for this tool call")
    name: str = Field(description="Tool name to call")
@ -20,11 +20,24 @@ class ChatMessageResponse(BaseModel):
    content: Optional[str] = Field(
        default=None, description="Message content (None if tool calls present)"
    )
-    tool_calls: Optional[list[ToolCall]] = Field(
+    tool_calls: Optional[list[ToolCallInvocation]] = Field(
        default=None, description="Tool calls if LLM wants to call tools"
    )
 class ToolCall(BaseModel):
    """A tool that was executed during the completion, with its response."""
    name: str = Field(description="Tool name that was called")
    arguments: dict[str, Any] = Field(
        default_factory=dict, description="Arguments passed to the tool"
    )
    response: str = Field(
        default="",
        description="The response or result returned from the tool execution",
    )
 class ChatCompletionResponse(BaseModel):
    """Response from chat completion."""
@ -35,3 +48,7 @@ class ChatCompletionResponse(BaseModel):
    tool_iterations: int = Field(
        default=0, description="Number of tool call iterations performed"
    )
    tool_calls: list[ToolCall] = Field(
        default_factory=list,
        description="List of tool calls that were executed during this completion",
    )
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@ -5,10 +5,12 @@ import json
 import logging
 from typing import Any, Optional
 import httpx
 import requests
 from frigate.config import GenAIProviderEnum
 from frigate.genai import GenAIClient, register_genai_provider
 from frigate.genai.utils import parse_tool_calls_from_message
 logger = logging.getLogger(__name__)
@ -100,7 +102,79 @@ class LlamaCppClient(GenAIClient):
    def get_context_size(self) -> int:
        """Get the context window size for llama.cpp."""
-        return self.genai_config.provider_options.get("context_size", 4096)
+        return self.provider_options.get("context_size", 4096)
    def _build_payload(
        self,
        messages: list[dict[str, Any]],
        tools: Optional[list[dict[str, Any]]],
        tool_choice: Optional[str],
        stream: bool = False,
    ) -> dict[str, Any]:
        """Build request payload for chat completions (sync or stream)."""
        openai_tool_choice = None
        if tool_choice:
            if tool_choice == "none":
                openai_tool_choice = "none"
            elif tool_choice == "auto":
                openai_tool_choice = "auto"
            elif tool_choice == "required":
                openai_tool_choice = "required"
        payload: dict[str, Any] = {
            "messages": messages,
            "model": self.genai_config.model,
        }
        if stream:
            payload["stream"] = True
        if tools:
            payload["tools"] = tools
            if openai_tool_choice is not None:
                payload["tool_choice"] = openai_tool_choice
        provider_opts = {
            k: v for k, v in self.provider_options.items() if k != "context_size"
        }
        payload.update(provider_opts)
        return payload
    def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
        """Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
        message = choice.get("message", {})
        content = message.get("content")
        content = content.strip() if content else None
        tool_calls = parse_tool_calls_from_message(message)
        finish_reason = choice.get("finish_reason") or (
            "tool_calls" if tool_calls else "stop" if content else "error"
        )
        return {
            "content": content,
            "tool_calls": tool_calls,
            "finish_reason": finish_reason,
        }
    @staticmethod
    def _streamed_tool_calls_to_list(
        tool_calls_by_index: dict[int, dict[str, Any]],
    ) -> Optional[list[dict[str, Any]]]:
        """Convert streamed tool_calls index map to list of {id, name, arguments}."""
        if not tool_calls_by_index:
            return None
        result = []
        for idx in sorted(tool_calls_by_index.keys()):
            t = tool_calls_by_index[idx]
            args_str = t.get("arguments") or "{}"
            try:
                arguments = json.loads(args_str)
            except json.JSONDecodeError:
                arguments = {}
            result.append(
                {
                    "id": t.get("id", ""),
                    "name": t.get("name", ""),
                    "arguments": arguments,
                }
            )
        return result if result else None
    def chat_with_tools(
        self,
@ -123,32 +197,8 @@ class LlamaCppClient(GenAIClient):
                "tool_calls": None,
                "finish_reason": "error",
            }
        try:
-            openai_tool_choice = None
+            payload = self._build_payload(messages, tools, tool_choice, stream=False)
            if tool_choice:
                if tool_choice == "none":
                    openai_tool_choice = "none"
                elif tool_choice == "auto":
                    openai_tool_choice = "auto"
                elif tool_choice == "required":
                    openai_tool_choice = "required"
            payload = {
                "model": self.genai_config.model,
                "messages": messages,
            }
            if tools:
                payload["tools"] = tools
                if openai_tool_choice is not None:
                    payload["tool_choice"] = openai_tool_choice
            provider_opts = {
                k: v for k, v in self.provider_options.items() if k != "context_size"
            }
            payload.update(provider_opts)
            response = requests.post(
                f"{self.provider}/v1/chat/completions",
                json=payload,
@ -156,60 +206,13 @@ class LlamaCppClient(GenAIClient):
            )
            response.raise_for_status()
            result = response.json()
            if result is None or "choices" not in result or len(result["choices"]) == 0:
                return {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                }
-
+            return self._message_from_choice(result["choices"][0])
            choice = result["choices"][0]
            message = choice.get("message", {})
            content = message.get("content")
            if content:
                content = content.strip()
            else:
                content = None
            tool_calls = None
            if "tool_calls" in message and message["tool_calls"]:
                tool_calls = []
                for tool_call in message["tool_calls"]:
                    try:
                        function_data = tool_call.get("function", {})
                        arguments_str = function_data.get("arguments", "{}")
                        arguments = json.loads(arguments_str)
                    except (json.JSONDecodeError, KeyError, TypeError) as e:
                        logger.warning(
                            f"Failed to parse tool call arguments: {e}, "
                            f"tool: {function_data.get('name', 'unknown')}"
                        )
                        arguments = {}
                    tool_calls.append(
                        {
                            "id": tool_call.get("id", ""),
                            "name": function_data.get("name", ""),
                            "arguments": arguments,
                        }
                    )
            finish_reason = "error"
            if "finish_reason" in choice and choice["finish_reason"]:
                finish_reason = choice["finish_reason"]
            elif tool_calls:
                finish_reason = "tool_calls"
            elif content:
                finish_reason = "stop"
            return {
                "content": content,
                "tool_calls": tool_calls,
                "finish_reason": finish_reason,
            }
        except requests.exceptions.Timeout as e:
            logger.warning("llama.cpp request timed out: %s", str(e))
            return {
@ -221,8 +224,7 @@ class LlamaCppClient(GenAIClient):
            error_detail = str(e)
            if hasattr(e, "response") and e.response is not None:
                try:
-                    error_body = e.response.text
+                    error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
                    error_detail = f"{str(e)} - Response: {error_body[:500]}"
                except Exception:
                    pass
            logger.warning("llama.cpp returned an error: %s", error_detail)
@ -238,3 +240,111 @@ class LlamaCppClient(GenAIClient):
                "tool_calls": None,
                "finish_reason": "error",
            }
    async def chat_with_tools_stream(
        self,
        messages: list[dict[str, Any]],
        tools: Optional[list[dict[str, Any]]] = None,
        tool_choice: Optional[str] = "auto",
    ):
        """Stream chat with tools via OpenAI-compatible streaming API."""
        if self.provider is None:
            logger.warning(
                "llama.cpp provider has not been initialized. Check your llama.cpp configuration."
            )
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
            return
        try:
            payload = self._build_payload(messages, tools, tool_choice, stream=True)
            content_parts: list[str] = []
            tool_calls_by_index: dict[int, dict[str, Any]] = {}
            finish_reason = "stop"
            async with httpx.AsyncClient(timeout=float(self.timeout)) as client:
                async with client.stream(
                    "POST",
                    f"{self.provider}/v1/chat/completions",
                    json=payload,
                ) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
                        if not line.startswith("data: "):
                            continue
                        data_str = line[6:].strip()
                        if data_str == "[DONE]":
                            break
                        try:
                            data = json.loads(data_str)
                        except json.JSONDecodeError:
                            continue
                        choices = data.get("choices") or []
                        if not choices:
                            continue
                        delta = choices[0].get("delta", {})
                        if choices[0].get("finish_reason"):
                            finish_reason = choices[0]["finish_reason"]
                        if delta.get("content"):
                            content_parts.append(delta["content"])
                            yield ("content_delta", delta["content"])
                        for tc in delta.get("tool_calls") or []:
                            idx = tc.get("index", 0)
                            fn = tc.get("function") or {}
                            if idx not in tool_calls_by_index:
                                tool_calls_by_index[idx] = {
                                    "id": tc.get("id", ""),
                                    "name": tc.get("name") or fn.get("name", ""),
                                    "arguments": "",
                                }
                            t = tool_calls_by_index[idx]
                            if tc.get("id"):
                                t["id"] = tc["id"]
                            name = tc.get("name") or fn.get("name")
                            if name:
                                t["name"] = name
                            arg = tc.get("arguments") or fn.get("arguments")
                            if arg is not None:
                                t["arguments"] += (
                                    arg if isinstance(arg, str) else json.dumps(arg)
                                )
            full_content = "".join(content_parts).strip() or None
            tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
            if tool_calls_list:
                finish_reason = "tool_calls"
            yield (
                "message",
                {
                    "content": full_content,
                    "tool_calls": tool_calls_list,
                    "finish_reason": finish_reason,
                },
            )
        except httpx.HTTPStatusError as e:
            logger.warning("llama.cpp streaming HTTP error: %s", e)
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
        except Exception as e:
            logger.warning(
                "Unexpected error in llama.cpp chat_with_tools_stream: %s", str(e)
            )
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
--- a/frigate/genai/ollama.py
+++ b/frigate/genai/ollama.py
@ -1,15 +1,16 @@
 """Ollama Provider for Frigate AI."""
 import json
 import logging
 from typing import Any, Optional
 from httpx import RemoteProtocolError, TimeoutException
 from ollama import AsyncClient as OllamaAsyncClient
 from ollama import Client as ApiClient
 from ollama import ResponseError
 from frigate.config import GenAIProviderEnum
 from frigate.genai import GenAIClient, register_genai_provider
 from frigate.genai.utils import parse_tool_calls_from_message
 logger = logging.getLogger(__name__)
@ -88,6 +89,73 @@ class OllamaClient(GenAIClient):
            "num_ctx", 4096
        )
    def _build_request_params(
        self,
        messages: list[dict[str, Any]],
        tools: Optional[list[dict[str, Any]]],
        tool_choice: Optional[str],
        stream: bool = False,
    ) -> dict[str, Any]:
        """Build request_messages and params for chat (sync or stream)."""
        request_messages = []
        for msg in messages:
            msg_dict = {
                "role": msg.get("role"),
                "content": msg.get("content", ""),
            }
            if msg.get("tool_call_id"):
                msg_dict["tool_call_id"] = msg["tool_call_id"]
            if msg.get("name"):
                msg_dict["name"] = msg["name"]
            if msg.get("tool_calls"):
                msg_dict["tool_calls"] = msg["tool_calls"]
            request_messages.append(msg_dict)
        request_params: dict[str, Any] = {
            "model": self.genai_config.model,
            "messages": request_messages,
            **self.provider_options,
        }
        if stream:
            request_params["stream"] = True
        if tools:
            request_params["tools"] = tools
            if tool_choice:
                request_params["tool_choice"] = (
                    "none"
                    if tool_choice == "none"
                    else "required"
                    if tool_choice == "required"
                    else "auto"
                )
        return request_params
    def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
        """Parse Ollama chat response into {content, tool_calls, finish_reason}."""
        if not response or "message" not in response:
            return {
                "content": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
        message = response["message"]
        content = message.get("content", "").strip() if message.get("content") else None
        tool_calls = parse_tool_calls_from_message(message)
        finish_reason = "error"
        if response.get("done"):
            finish_reason = (
                "tool_calls" if tool_calls else "stop" if content else "error"
            )
        elif tool_calls:
            finish_reason = "tool_calls"
        elif content:
            finish_reason = "stop"
        return {
            "content": content,
            "tool_calls": tool_calls,
            "finish_reason": finish_reason,
        }
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
@ -103,93 +171,12 @@ class OllamaClient(GenAIClient):
                "tool_calls": None,
                "finish_reason": "error",
            }
        try:
-            request_messages = []
+            request_params = self._build_request_params(
-            for msg in messages:
+                messages, tools, tool_choice, stream=False
                msg_dict = {
                    "role": msg.get("role"),
                    "content": msg.get("content", ""),
                }
                if msg.get("tool_call_id"):
                    msg_dict["tool_call_id"] = msg["tool_call_id"]
                if msg.get("name"):
                    msg_dict["name"] = msg["name"]
                if msg.get("tool_calls"):
                    msg_dict["tool_calls"] = msg["tool_calls"]
                request_messages.append(msg_dict)
            request_params = {
                "model": self.genai_config.model,
                "messages": request_messages,
            }
            if tools:
                request_params["tools"] = tools
                if tool_choice:
                    if tool_choice == "none":
                        request_params["tool_choice"] = "none"
                    elif tool_choice == "required":
                        request_params["tool_choice"] = "required"
                    elif tool_choice == "auto":
                        request_params["tool_choice"] = "auto"
            request_params.update(self.provider_options)
            response = self.provider.chat(**request_params)
            if not response or "message" not in response:
                return {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                }
            message = response["message"]
            content = (
                message.get("content", "").strip() if message.get("content") else None
            )
-
+            response = self.provider.chat(**request_params)
-            tool_calls = None
+            return self._message_from_response(response)
            if "tool_calls" in message and message["tool_calls"]:
                tool_calls = []
                for tool_call in message["tool_calls"]:
                    try:
                        function_data = tool_call.get("function", {})
                        arguments_str = function_data.get("arguments", "{}")
                        arguments = json.loads(arguments_str)
                    except (json.JSONDecodeError, KeyError, TypeError) as e:
                        logger.warning(
                            f"Failed to parse tool call arguments: {e}, "
                            f"tool: {function_data.get('name', 'unknown')}"
                        )
                        arguments = {}
                    tool_calls.append(
                        {
                            "id": tool_call.get("id", ""),
                            "name": function_data.get("name", ""),
                            "arguments": arguments,
                        }
                    )
            finish_reason = "error"
            if "done" in response and response["done"]:
                if tool_calls:
                    finish_reason = "tool_calls"
                elif content:
                    finish_reason = "stop"
            elif tool_calls:
                finish_reason = "tool_calls"
            elif content:
                finish_reason = "stop"
            return {
                "content": content,
                "tool_calls": tool_calls,
                "finish_reason": finish_reason,
            }
        except (TimeoutException, ResponseError, ConnectionError) as e:
            logger.warning("Ollama returned an error: %s", str(e))
            return {
@ -204,3 +191,89 @@ class OllamaClient(GenAIClient):
                "tool_calls": None,
                "finish_reason": "error",
            }
    async def chat_with_tools_stream(
        self,
        messages: list[dict[str, Any]],
        tools: Optional[list[dict[str, Any]]] = None,
        tool_choice: Optional[str] = "auto",
    ):
        """Stream chat with tools; yields content deltas then final message."""
        if self.provider is None:
            logger.warning(
                "Ollama provider has not been initialized. Check your Ollama configuration."
            )
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
            return
        try:
            request_params = self._build_request_params(
                messages, tools, tool_choice, stream=True
            )
            async_client = OllamaAsyncClient(
                host=self.genai_config.base_url,
                timeout=self.timeout,
            )
            content_parts: list[str] = []
            final_message: dict[str, Any] | None = None
            try:
                stream = await async_client.chat(**request_params)
                async for chunk in stream:
                    if not chunk or "message" not in chunk:
                        continue
                    msg = chunk.get("message", {})
                    delta = msg.get("content") or ""
                    if delta:
                        content_parts.append(delta)
                        yield ("content_delta", delta)
                    if chunk.get("done"):
                        full_content = "".join(content_parts).strip() or None
                        tool_calls = parse_tool_calls_from_message(msg)
                        final_message = {
                            "content": full_content,
                            "tool_calls": tool_calls,
                            "finish_reason": "tool_calls" if tool_calls else "stop",
                        }
                        break
            finally:
                await async_client.close()
            if final_message is not None:
                yield ("message", final_message)
            else:
                yield (
                    "message",
                    {
                        "content": "".join(content_parts).strip() or None,
                        "tool_calls": None,
                        "finish_reason": "stop",
                    },
                )
        except (TimeoutException, ResponseError, ConnectionError) as e:
            logger.warning("Ollama streaming error: %s", str(e))
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
        except Exception as e:
            logger.warning(
                "Unexpected error in Ollama chat_with_tools_stream: %s", str(e)
            )
            yield (
                "message",
                {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                },
            )
--- a/frigate/genai/utils.py
+++ b/frigate/genai/utils.py
@ -0,0 +1,70 @@
 """Shared helpers for GenAI providers and chat (OpenAI-style messages, tool call parsing)."""
 import json
 import logging
 from typing import Any, List, Optional
 logger = logging.getLogger(__name__)
 def parse_tool_calls_from_message(
    message: dict[str, Any],
 ) -> Optional[list[dict[str, Any]]]:
    """
    Parse tool_calls from an OpenAI-style message dict.
    Message may have "tool_calls" as a list of:
      {"id": str, "function": {"name": str, "arguments": str}, ...}
    Returns a list of {"id", "name", "arguments"} with arguments parsed as dict,
    or None if no tool_calls. Used by Ollama and LlamaCpp (non-stream) responses.
    """
    raw = message.get("tool_calls")
    if not raw or not isinstance(raw, list):
        return None
    result = []
    for tool_call in raw:
        function_data = tool_call.get("function") or {}
        try:
            arguments_str = function_data.get("arguments") or "{}"
            arguments = json.loads(arguments_str)
        except (json.JSONDecodeError, KeyError, TypeError) as e:
            logger.warning(
                "Failed to parse tool call arguments: %s, tool: %s",
                e,
                function_data.get("name", "unknown"),
            )
            arguments = {}
        result.append(
            {
                "id": tool_call.get("id", ""),
                "name": function_data.get("name", ""),
                "arguments": arguments,
            }
        )
    return result if result else None
 def build_assistant_message_for_conversation(
    content: Any,
    tool_calls_raw: Optional[List[dict[str, Any]]],
 ) -> dict[str, Any]:
    """
    Build the assistant message dict in OpenAI format for appending to a conversation.
    tool_calls_raw: list of {"id", "name", "arguments"} (arguments as dict), or None.
    """
    msg: dict[str, Any] = {"role": "assistant", "content": content}
    if tool_calls_raw:
        msg["tool_calls"] = [
            {
                "id": tc["id"],
                "type": "function",
                "function": {
                    "name": tc["name"],
                    "arguments": json.dumps(tc.get("arguments") or {}),
                },
            }
            for tc in tool_calls_raw
        ]
    return msg
--- a/web/package-lock.json
+++ b/web/package-lock.json
--- a/web/package.json
+++ b/web/package.json
@ -75,6 +75,8 @@
    "react-icons": "^5.5.0",
    "react-konva": "^18.2.10",
    "react-router-dom": "^6.30.3",
    "react-markdown": "^9.0.1",
    "remark-gfm": "^4.0.0",
    "react-swipeable": "^7.0.2",
    "react-tracked": "^2.0.1",
    "react-transition-group": "^4.4.5",
--- a/web/public/locales/en/common.json
+++ b/web/public/locales/en/common.json
@ -129,6 +129,7 @@
    "cancel": "Cancel",
    "close": "Close",
    "copy": "Copy",
    "copiedToClipboard": "Copied to clipboard",
    "back": "Back",
    "history": "History",
    "fullscreen": "Fullscreen",
@ -254,6 +255,7 @@
    "uiPlayground": "UI Playground",
    "faceLibrary": "Face Library",
    "classification": "Classification",
    "chat": "Chat",
    "user": {
      "title": "User",
      "account": "Account",
--- a/web/public/locales/en/views/chat.json
+++ b/web/public/locales/en/views/chat.json
@ -0,0 +1,24 @@
 {
  "title": "Frigate Chat",
  "subtitle": "Your AI assistant for camera management and insights",
  "placeholder": "Ask anything...",
  "error": "Something went wrong. Please try again.",
  "processing": "Processing...",
  "toolsUsed": "Used: {{tools}}",
  "showTools": "Show tools ({{count}})",
  "hideTools": "Hide tools",
  "call": "Call",
  "result": "Result",
  "arguments": "Arguments:",
  "response": "Response:",
  "send": "Send",
  "suggested_requests": "Try asking:",
  "starting_requests": {
    "show_recent_events": "Show recent events",
    "show_camera_status": "Show camera status"
  },
  "starting_requests_prompts": {
    "show_recent_events": "Show me the recent events from the last hour",
    "show_camera_status": "What is the current status of my cameras?"
  }
 }
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@ -27,6 +27,7 @@ const Settings = lazy(() => import("@/pages/Settings"));
 const UIPlayground = lazy(() => import("@/pages/UIPlayground"));
 const FaceLibrary = lazy(() => import("@/pages/FaceLibrary"));
 const Classification = lazy(() => import("@/pages/ClassificationModel"));
 const Chat = lazy(() => import("@/pages/Chat"));
 const Logs = lazy(() => import("@/pages/Logs"));
 const AccessDenied = lazy(() => import("@/pages/AccessDenied"));
@ -106,6 +107,7 @@ function DefaultAppView() {
              <Route path="/logs" element={<Logs />} />
              <Route path="/faces" element={<FaceLibrary />} />
              <Route path="/classification" element={<Classification />} />
              <Route path="/chat" element={<Chat />} />
              <Route path="/playground" element={<UIPlayground />} />
            </Route>
            <Route path="/unauthorized" element={<AccessDenied />} />
--- a/web/src/components/chat/ChatEventThumbnailsRow.tsx
+++ b/web/src/components/chat/ChatEventThumbnailsRow.tsx
@ -0,0 +1,42 @@
 import { useApiHost } from "@/api";
 type ChatEventThumbnailsRowProps = {
  events: { id: string }[];
 };
 /**
 * Horizontal scroll row of event thumbnail images for chat (e.g. after search_objects).
 * Renders nothing when events is empty.
 */
 export function ChatEventThumbnailsRow({
  events,
 }: ChatEventThumbnailsRowProps) {
  const apiHost = useApiHost();
  if (events.length === 0) return null;
  return (
    <div className="flex min-w-0 max-w-full flex-col gap-1 self-start">
      <div className="scrollbar-container min-w-0 overflow-x-auto">
        <div className="flex w-max gap-2">
          {events.map((event) => (
            <a
              key={event.id}
              href={`/explore?event_id=${event.id}`}
              target="_blank"
              rel="noopener noreferrer"
              className="relative aspect-square size-32 shrink-0 overflow-hidden rounded-lg"
            >
              <img
                className="size-full object-cover"
                src={`${apiHost}api/events/${event.id}/thumbnail.webp`}
                alt=""
                loading="lazy"
              />
            </a>
          ))}
        </div>
      </div>
    </div>
  );
 }
--- a/web/src/components/chat/ChatMessage.tsx
+++ b/web/src/components/chat/ChatMessage.tsx
@ -0,0 +1,208 @@
 import { useState, useEffect, useRef } from "react";
 import ReactMarkdown from "react-markdown";
 import remarkGfm from "remark-gfm";
 import { useTranslation } from "react-i18next";
 import copy from "copy-to-clipboard";
 import { toast } from "sonner";
 import { FaCopy, FaPencilAlt } from "react-icons/fa";
 import { FaArrowUpLong } from "react-icons/fa6";
 import { Button } from "@/components/ui/button";
 import { Textarea } from "@/components/ui/textarea";
 import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
 } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
 type MessageBubbleProps = {
  role: "user" | "assistant";
  content: string;
  messageIndex?: number;
  onEditSubmit?: (messageIndex: number, newContent: string) => void;
  isComplete?: boolean;
 };
 export function MessageBubble({
  role,
  content,
  messageIndex = 0,
  onEditSubmit,
  isComplete = true,
 }: MessageBubbleProps) {
  const { t } = useTranslation(["views/chat", "common"]);
  const isUser = role === "user";
  const [isEditing, setIsEditing] = useState(false);
  const [draftContent, setDraftContent] = useState(content);
  const editInputRef = useRef<HTMLTextAreaElement>(null);
  useEffect(() => {
    setDraftContent(content);
  }, [content]);
  useEffect(() => {
    if (isEditing) {
      editInputRef.current?.focus();
      editInputRef.current?.setSelectionRange(
        editInputRef.current.value.length,
        editInputRef.current.value.length,
      );
    }
  }, [isEditing]);
  const handleCopy = () => {
    const text = content?.trim() || "";
    if (!text) return;
    if (copy(text)) {
      toast.success(t("button.copiedToClipboard", { ns: "common" }));
    }
  };
  const handleEditClick = () => {
    setDraftContent(content);
    setIsEditing(true);
  };
  const handleEditSubmit = () => {
    const trimmed = draftContent.trim();
    if (!trimmed || onEditSubmit == null) return;
    onEditSubmit(messageIndex, trimmed);
    setIsEditing(false);
  };
  const handleEditCancel = () => {
    setDraftContent(content);
    setIsEditing(false);
  };
  const handleEditKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
      handleEditSubmit();
    }
    if (e.key === "Escape") {
      handleEditCancel();
    }
  };
  if (isUser && isEditing) {
    return (
      <div className="flex w-full max-w-full flex-col gap-2 self-end">
        <Textarea
          ref={editInputRef}
          value={draftContent}
          onChange={(e) => setDraftContent(e.target.value)}
          onKeyDown={handleEditKeyDown}
          className="min-h-[80px] w-full resize-y rounded-lg bg-primary px-3 py-2 text-primary-foreground placeholder:text-primary-foreground/60"
          placeholder={t("placeholder")}
          rows={3}
        />
        <div className="flex items-center gap-2 self-end">
          <Button
            variant="ghost"
            size="sm"
            className="text-muted-foreground hover:text-foreground"
            onClick={handleEditCancel}
          >
            {t("button.cancel", { ns: "common" })}
          </Button>
          <Button
            variant="select"
            size="icon"
            className="size-9 rounded-full"
            disabled={!draftContent.trim()}
            onClick={handleEditSubmit}
            aria-label={t("send")}
          >
            <FaArrowUpLong size="16" />
          </Button>
        </div>
      </div>
    );
  }
  return (
    <div
      className={cn(
        "flex flex-col gap-1",
        isUser ? "items-end self-end" : "items-start self-start",
      )}
    >
      <div
        className={cn(
          "rounded-lg px-3 py-2",
          isUser ? "bg-primary text-primary-foreground" : "bg-muted",
        )}
      >
        {isUser ? (
          content
        ) : (
          <ReactMarkdown
            remarkPlugins={[remarkGfm]}
            components={{
              table: ({ node: _n, ...props }) => (
                <table
                  className="my-2 w-full border-collapse border border-border"
                  {...props}
                />
              ),
              th: ({ node: _n, ...props }) => (
                <th
                  className="border border-border bg-muted/50 px-2 py-1 text-left text-sm font-medium"
                  {...props}
                />
              ),
              td: ({ node: _n, ...props }) => (
                <td
                  className="border border-border px-2 py-1 text-sm"
                  {...props}
                />
              ),
            }}
          >
            {content}
          </ReactMarkdown>
        )}
      </div>
      <div className="flex items-center gap-0.5">
        {isUser && onEditSubmit != null && (
          <Tooltip>
            <TooltipTrigger asChild>
              <Button
                variant="ghost"
                size="icon"
                className="size-7 text-muted-foreground hover:text-foreground"
                onClick={handleEditClick}
                aria-label={t("button.edit", { ns: "common" })}
              >
                <FaPencilAlt className="size-3" />
              </Button>
            </TooltipTrigger>
            <TooltipContent>
              {t("button.edit", { ns: "common" })}
            </TooltipContent>
          </Tooltip>
        )}
        {isComplete && (
          <Tooltip>
            <TooltipTrigger asChild>
              <Button
                variant="ghost"
                size="icon"
                className="size-7 text-muted-foreground hover:text-foreground"
                onClick={handleCopy}
                disabled={!content?.trim()}
                aria-label={t("button.copy", { ns: "common" })}
              >
                <FaCopy className="size-3" />
              </Button>
            </TooltipTrigger>
            <TooltipContent>
              {t("button.copy", { ns: "common" })}
            </TooltipContent>
          </Tooltip>
        )}
      </div>
    </div>
  );
 }
--- a/web/src/components/chat/ChatStartingState.tsx
+++ b/web/src/components/chat/ChatStartingState.tsx
@ -0,0 +1,89 @@
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { FaArrowUpLong } from "react-icons/fa6";
 import { useTranslation } from "react-i18next";
 import { useState } from "react";
 import type { StartingRequest } from "@/types/chat";
 type ChatStartingStateProps = {
  onSendMessage: (message: string) => void;
 };
 export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
  const { t } = useTranslation(["views/chat"]);
  const [input, setInput] = useState("");
  const defaultRequests: StartingRequest[] = [
    {
      label: t("starting_requests.show_recent_events"),
      prompt: t("starting_requests_prompts.show_recent_events"),
    },
    {
      label: t("starting_requests.show_camera_status"),
      prompt: t("starting_requests_prompts.show_camera_status"),
    },
  ];
  const handleRequestClick = (prompt: string) => {
    onSendMessage(prompt);
  };
  const handleSubmit = () => {
    const text = input.trim();
    if (!text) return;
    onSendMessage(text);
    setInput("");
  };
  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
      handleSubmit();
    }
  };
  return (
    <div className="flex size-full flex-col items-center justify-center gap-6 p-8">
      <div className="flex flex-col items-center gap-2">
        <h1 className="text-4xl font-bold text-foreground">{t("title")}</h1>
        <p className="text-muted-foreground">{t("subtitle")}</p>
      </div>
      <div className="flex w-full max-w-2xl flex-col items-center gap-4">
        <p className="text-center text-sm text-muted-foreground">
          {t("suggested_requests")}
        </p>
        <div className="flex w-full flex-wrap justify-center gap-2">
          {defaultRequests.map((request, idx) => (
            <Button
              key={idx}
              variant="outline"
              className="max-w-sm text-sm"
              onClick={() => handleRequestClick(request.prompt)}
            >
              {request.label}
            </Button>
          ))}
        </div>
      </div>
      <div className="flex w-full max-w-2xl flex-row items-center gap-2 rounded-xl bg-secondary p-4">
        <Input
          className="h-12 w-full flex-1 border-transparent bg-transparent text-base shadow-none focus-visible:ring-0 dark:bg-transparent"
          placeholder={t("placeholder")}
          value={input}
          onChange={(e) => setInput(e.target.value)}
          onKeyDown={handleKeyDown}
        />
        <Button
          variant="select"
          className="size-10 shrink-0 rounded-full"
          disabled={!input.trim()}
          onClick={handleSubmit}
        >
          <FaArrowUpLong size="18" />
        </Button>
      </div>
    </div>
  );
 }
--- a/web/src/components/chat/ToolCallBubble.tsx
+++ b/web/src/components/chat/ToolCallBubble.tsx
@ -0,0 +1,88 @@
 import { useState } from "react";
 import { useTranslation } from "react-i18next";
 import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
 } from "@/components/ui/collapsible";
 import { Button } from "@/components/ui/button";
 import { cn } from "@/lib/utils";
 import { ChevronDown, ChevronRight } from "lucide-react";
 type ToolCallBubbleProps = {
  name: string;
  arguments?: Record<string, unknown>;
  response?: string;
  side: "left" | "right";
 };
 export function ToolCallBubble({
  name,
  arguments: args,
  response,
  side,
 }: ToolCallBubbleProps) {
  const { t } = useTranslation(["views/chat"]);
  const [open, setOpen] = useState(false);
  const isLeft = side === "left";
  const normalizedName = name
    .replace(/_/g, " ")
    .split(" ")
    .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
    .join(" ");
  return (
    <div
      className={cn(
        "rounded-lg px-3 py-2",
        isLeft
          ? "self-start bg-muted"
          : "self-end bg-primary text-primary-foreground",
      )}
    >
      <Collapsible open={open} onOpenChange={setOpen}>
        <CollapsibleTrigger asChild>
          <Button
            variant="ghost"
            size="sm"
            className={cn(
              "h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent",
              !isLeft && "hover:text-primary-foreground",
            )}
          >
            {open ? (
              <ChevronDown size={12} className="shrink-0" />
            ) : (
              <ChevronRight size={12} className="shrink-0" />
            )}
            <span className="break-words font-medium">
              {isLeft ? t("call") : t("result")} {normalizedName}
            </span>
          </Button>
        </CollapsibleTrigger>
        <CollapsibleContent>
          <div className="mt-2 space-y-2">
            {isLeft && args && Object.keys(args).length > 0 && (
              <div className="text-xs">
                <div className="font-medium text-muted-foreground">
                  {t("arguments")}
                </div>
                <pre className="scrollbar-container mt-1 max-h-32 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 text-[10px]">
                  {JSON.stringify(args, null, 2)}
                </pre>
              </div>
            )}
            {!isLeft && response && response !== "" && (
              <div className="text-xs">
                <div className="font-medium opacity-80">{t("response")}</div>
                <pre className="scrollbar-container mt-1 max-h-32 overflow-auto whitespace-pre-wrap break-words rounded bg-primary/20 p-2 text-[10px]">
                  {response}
                </pre>
              </div>
            )}
          </div>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
 }
--- a/web/src/hooks/use-navigation.ts
+++ b/web/src/hooks/use-navigation.ts
@ -6,7 +6,7 @@ import { isDesktop } from "react-device-detect";
 import { FaCompactDisc, FaVideo } from "react-icons/fa";
 import { IoSearch } from "react-icons/io5";
 import { LuConstruction } from "react-icons/lu";
-import { MdCategory, MdVideoLibrary } from "react-icons/md";
+import { MdCategory, MdChat, MdVideoLibrary } from "react-icons/md";
 import { TbFaceId } from "react-icons/tb";
 import useSWR from "swr";
 import { useIsAdmin } from "./use-is-admin";
@ -18,6 +18,7 @@ export const ID_EXPORT = 4;
 export const ID_PLAYGROUND = 5;
 export const ID_FACE_LIBRARY = 6;
 export const ID_CLASSIFICATION = 7;
 export const ID_CHAT = 8;
 export default function useNavigation(
  variant: "primary" | "secondary" = "primary",
@ -82,7 +83,15 @@ export default function useNavigation(
          url: "/classification",
          enabled: isDesktop && isAdmin,
        },
        {
          id: ID_CHAT,
          variant,
          icon: MdChat,
          title: "menu.chat",
          url: "/chat",
          enabled: isDesktop && isAdmin && config?.genai?.model !== "none",
        },
      ] as NavData[],
-    [config?.face_recognition?.enabled, variant, isAdmin],
+    [config?.face_recognition?.enabled, config?.genai?.model, variant, isAdmin],
  );
 }
--- a/web/src/pages/Chat.tsx
+++ b/web/src/pages/Chat.tsx
@ -0,0 +1,226 @@
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { FaArrowUpLong } from "react-icons/fa6";
 import { useTranslation } from "react-i18next";
 import { useState, useCallback } from "react";
 import axios from "axios";
 import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
 import { MessageBubble } from "@/components/chat/ChatMessage";
 import { ToolCallBubble } from "@/components/chat/ToolCallBubble";
 import { ChatStartingState } from "@/components/chat/ChatStartingState";
 import type { ChatMessage } from "@/types/chat";
 import {
  getEventIdsFromSearchObjectsToolCalls,
  streamChatCompletion,
 } from "@/utils/chatUtil";
 export default function ChatPage() {
  const { t } = useTranslation(["views/chat"]);
  const [input, setInput] = useState("");
  const [messages, setMessages] = useState<ChatMessage[]>([]);
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const submitConversation = useCallback(
    async (messagesToSend: ChatMessage[]) => {
      if (isLoading) return;
      const last = messagesToSend[messagesToSend.length - 1];
      if (!last || last.role !== "user" || !last.content.trim()) return;
      setError(null);
      const assistantPlaceholder: ChatMessage = {
        role: "assistant",
        content: "",
        toolCalls: undefined,
      };
      setMessages([...messagesToSend, assistantPlaceholder]);
      setIsLoading(true);
      const apiMessages = messagesToSend.map((m) => ({
        role: m.role,
        content: m.content,
      }));
      const baseURL = axios.defaults.baseURL ?? "";
      const url = `${baseURL}chat/completion`;
      const headers: Record<string, string> = {
        "Content-Type": "application/json",
        ...(axios.defaults.headers.common as Record<string, string>),
      };
      await streamChatCompletion(url, headers, apiMessages, {
        updateMessages: (updater) => setMessages(updater),
        onError: (message) => setError(message),
        onDone: () => setIsLoading(false),
        defaultErrorMessage: t("error"),
      });
    },
    [isLoading, t],
  );
  const sendMessage = useCallback(() => {
    const text = input.trim();
    if (!text || isLoading) return;
    setInput("");
    submitConversation([...messages, { role: "user", content: text }]);
  }, [input, isLoading, messages, submitConversation]);
  const handleEditSubmit = useCallback(
    (messageIndex: number, newContent: string) => {
      const newList: ChatMessage[] = [
        ...messages.slice(0, messageIndex),
        { role: "user", content: newContent },
      ];
      submitConversation(newList);
    },
    [messages, submitConversation],
  );
  return (
    <div className="flex size-full justify-center p-2">
      <div className="flex size-full flex-col xl:w-[50%] 3xl:w-[35%]">
        {messages.length === 0 ? (
          <ChatStartingState
            onSendMessage={(message) => {
              setInput("");
              submitConversation([{ role: "user", content: message }]);
            }}
          />
        ) : (
          <div className="scrollbar-container flex min-h-0 w-full flex-1 flex-col gap-2 overflow-y-auto">
            {messages.map((msg, i) => {
              const isStreamingPlaceholder =
                i === messages.length - 1 &&
                msg.role === "assistant" &&
                isLoading &&
                !msg.content?.trim() &&
                !(msg.toolCalls && msg.toolCalls.length > 0);
              if (isStreamingPlaceholder) {
                return <div key={i} />;
              }
              return (
                <div key={i} className="flex flex-col gap-2">
                  {msg.role === "assistant" && msg.toolCalls && (
                    <>
                      {msg.toolCalls.map((tc, tcIdx) => (
                        <div key={tcIdx} className="flex flex-col gap-2">
                          <ToolCallBubble
                            name={tc.name}
                            arguments={tc.arguments}
                            side="left"
                          />
                          {tc.response && (
                            <ToolCallBubble
                              name={tc.name}
                              response={tc.response}
                              side="right"
                            />
                          )}
                        </div>
                      ))}
                    </>
                  )}
                  <MessageBubble
                    role={msg.role}
                    content={msg.content}
                    messageIndex={i}
                    onEditSubmit={
                      msg.role === "user" ? handleEditSubmit : undefined
                    }
                    isComplete={
                      msg.role === "user" ||
                      !isLoading ||
                      i < messages.length - 1
                    }
                  />
                  {msg.role === "assistant" &&
                    (() => {
                      const isComplete = !isLoading || i < messages.length - 1;
                      if (!isComplete) return null;
                      const events = getEventIdsFromSearchObjectsToolCalls(
                        msg.toolCalls,
                      );
                      return <ChatEventThumbnailsRow events={events} />;
                    })()}
                </div>
              );
            })}
            {(() => {
              const lastMsg = messages[messages.length - 1];
              const showProcessing =
                isLoading &&
                lastMsg?.role === "assistant" &&
                !lastMsg.content?.trim() &&
                !(lastMsg.toolCalls && lastMsg.toolCalls.length > 0);
              return showProcessing ? (
                <div className="self-start rounded-lg bg-muted px-3 py-2 text-muted-foreground">
                  {t("processing")}
                </div>
              ) : null;
            })()}
            {error && (
              <p className="self-start text-sm text-destructive" role="alert">
                {error}
              </p>
            )}
          </div>
        )}
        {messages.length > 0 && (
          <ChatEntry
            input={input}
            setInput={setInput}
            sendMessage={sendMessage}
            isLoading={isLoading}
            placeholder={t("placeholder")}
          />
        )}
      </div>
    </div>
  );
 }
 type ChatEntryProps = {
  input: string;
  setInput: (value: string) => void;
  sendMessage: () => void;
  isLoading: boolean;
  placeholder: string;
 };
 function ChatEntry({
  input,
  setInput,
  sendMessage,
  isLoading,
  placeholder,
 }: ChatEntryProps) {
  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
      sendMessage();
    }
  };
  return (
    <div className="flex w-full flex-col items-center justify-center rounded-xl bg-secondary p-2">
      <div className="flex w-full flex-row items-center gap-2">
        <Input
          className="w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent"
          placeholder={placeholder}
          value={input}
          onChange={(e) => setInput(e.target.value)}
          onKeyDown={handleKeyDown}
          aria-busy={isLoading}
        />
        <Button
          variant="select"
          className="size-10 shrink-0 rounded-full"
          disabled={!input.trim() || isLoading}
          onClick={sendMessage}
        >
          <FaArrowUpLong size="16" />
        </Button>
      </div>
    </div>
  );
 }
--- a/web/src/types/chat.ts
+++ b/web/src/types/chat.ts
@ -0,0 +1,16 @@
 export type ToolCall = {
  name: string;
  arguments?: Record<string, unknown>;
  response?: string;
 };
 export type ChatMessage = {
  role: "user" | "assistant";
  content: string;
  toolCalls?: ToolCall[];
 };
 export type StartingRequest = {
  label: string;
  prompt: string;
 };
--- a/web/src/utils/chatUtil.ts
+++ b/web/src/utils/chatUtil.ts
@ -0,0 +1,193 @@
 import type { ChatMessage, ToolCall } from "@/types/chat";
 export type StreamChatCallbacks = {
  /** Update the messages array (e.g. pass to setState). */
  updateMessages: (updater: (prev: ChatMessage[]) => ChatMessage[]) => void;
  /** Called when the stream sends an error or fetch fails. */
  onError: (message: string) => void;
  /** Called when the stream finishes (success or error). */
  onDone: () => void;
  /** Message used when fetch throws and no server error is available. */
  defaultErrorMessage?: string;
 };
 type StreamChunk =
  | { type: "error"; error: string }
  | { type: "tool_calls"; tool_calls: ToolCall[] }
  | { type: "content"; delta: string };
 /**
 * POST to chat/completion with stream: true, parse NDJSON stream, and invoke
 * callbacks so the caller can update UI (e.g. React state).
 */
 export async function streamChatCompletion(
  url: string,
  headers: Record<string, string>,
  apiMessages: { role: string; content: string }[],
  callbacks: StreamChatCallbacks,
 ): Promise<void> {
  const {
    updateMessages,
    onError,
    onDone,
    defaultErrorMessage = "Something went wrong. Please try again.",
  } = callbacks;
  try {
    const res = await fetch(url, {
      method: "POST",
      headers,
      body: JSON.stringify({ messages: apiMessages, stream: true }),
    });
    if (!res.ok) {
      const errBody = await res.json().catch(() => ({}));
      const message = (errBody as { error?: string }).error ?? res.statusText;
      onError(message);
      onDone();
      return;
    }
    const reader = res.body?.getReader();
    const decoder = new TextDecoder();
    if (!reader) {
      onError("No response body");
      onDone();
      return;
    }
    let buffer = "";
    let hadStreamError = false;
    const applyChunk = (data: StreamChunk) => {
      if (data.type === "error") {
        onError(data.error);
        updateMessages((prev) =>
          prev.filter((m) => !(m.role === "assistant" && m.content === "")),
        );
        return "break";
      }
      if (data.type === "tool_calls" && data.tool_calls?.length) {
        updateMessages((prev) => {
          const next = [...prev];
          const lastMsg = next[next.length - 1];
          if (lastMsg?.role === "assistant")
            next[next.length - 1] = {
              ...lastMsg,
              toolCalls: data.tool_calls,
            };
          return next;
        });
        return "continue";
      }
      if (data.type === "content" && data.delta !== undefined) {
        updateMessages((prev) => {
          const next = [...prev];
          const lastMsg = next[next.length - 1];
          if (lastMsg?.role === "assistant")
            next[next.length - 1] = {
              ...lastMsg,
              content: lastMsg.content + data.delta,
            };
          return next;
        });
        return "continue";
      }
      return "continue";
    };
    for (;;) {
      const { done, value } = await reader.read();
      if (done) break;
      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() ?? "";
      for (const line of lines) {
        const trimmed = line.trim();
        if (!trimmed) continue;
        try {
          const data = JSON.parse(trimmed) as StreamChunk & { type: string };
          const result = applyChunk(data as StreamChunk);
          if (result === "break") {
            hadStreamError = true;
            break;
          }
        } catch {
          // skip malformed JSON lines
        }
      }
      if (hadStreamError) break;
    }
    // Flush remaining buffer
    if (!hadStreamError && buffer.trim()) {
      try {
        const data = JSON.parse(buffer.trim()) as StreamChunk & {
          type: string;
          delta?: string;
        };
        if (data.type === "content" && data.delta !== undefined) {
          updateMessages((prev) => {
            const next = [...prev];
            const lastMsg = next[next.length - 1];
            if (lastMsg?.role === "assistant")
              next[next.length - 1] = {
                ...lastMsg,
                content: lastMsg.content + data.delta!,
              };
            return next;
          });
        }
      } catch {
        // ignore final malformed chunk
      }
    }
    if (!hadStreamError) {
      updateMessages((prev) => {
        const next = [...prev];
        const lastMsg = next[next.length - 1];
        if (lastMsg?.role === "assistant" && lastMsg.content === "")
          next[next.length - 1] = { ...lastMsg, content: " " };
        return next;
      });
    }
  } catch {
    onError(defaultErrorMessage);
    updateMessages((prev) =>
      prev.filter((m) => !(m.role === "assistant" && m.content === "")),
    );
  } finally {
    onDone();
  }
 }
 /**
 * Parse search_objects tool call response(s) into event ids for thumbnails.
 */
 export function getEventIdsFromSearchObjectsToolCalls(
  toolCalls: ToolCall[] | undefined,
 ): { id: string }[] {
  if (!toolCalls?.length) return [];
  const results: { id: string }[] = [];
  for (const tc of toolCalls) {
    if (tc.name !== "search_objects" || !tc.response?.trim()) continue;
    try {
      const parsed = JSON.parse(tc.response) as unknown;
      if (!Array.isArray(parsed)) continue;
      for (const item of parsed) {
        if (
          item &&
          typeof item === "object" &&
          "id" in item &&
          typeof (item as { id: unknown }).id === "string"
        ) {
          results.push({ id: (item as { id: string }).id });
        }
      }
    } catch {
      // ignore parse errors
    }
  }
  return results;
 }
--- a/web/src/utils/i18n.ts
+++ b/web/src/utils/i18n.ts
@ -46,6 +46,7 @@ i18n
      "components/icons",
      "components/player",
      "views/events",
      "views/chat",
      "views/explore",
      "views/live",
      "views/settings",