From 5fdb56a10632b61dd94e087425753c0305369a9f Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 22 Jan 2026 12:04:40 -0700
Subject: [PATCH] Add live context tool to LLM (#21754)

* Add live context tool

* Improve handling of images in request

* Improve prompt caching
---
 frigate/api/chat.py                   | 172 +++++++++++++++++++++++++-
 frigate/api/defs/request/chat_body.py |   7 ++
 frigate/genai/__init__.py             |   5 +-
 frigate/genai/llama_cpp.py            |   9 +-
 4 files changed, 188 insertions(+), 5 deletions(-)

diff --git a/frigate/api/chat.py b/frigate/api/chat.py
index eeff3ab6d..444650e13 100644
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@@ -1,10 +1,12 @@
 """Chat and LLM tool calling APIs."""
 
+import base64
 import json
 import logging
 from datetime import datetime, timezone
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
+import cv2
 from fastapi import APIRouter, Body, Depends, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
@@ -87,6 +89,28 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
                 "required": [],
             },
         },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_live_context",
+                "description": (
+                    "Get the current detection information for a camera: objects being tracked, "
+                    "zones, timestamps. Use this to understand what is visible in the live view. "
+                    "Call this when the user has included a live image (via include_live_image) or "
+                    "when answering questions about what is happening right now on a specific camera."
+                ),
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "camera": {
+                            "type": "string",
+                            "description": "Camera name to get live context for.",
+                        },
+                    },
+                    "required": ["camera"],
+                },
+            },
+        },
     ]
 
 
@@ -207,6 +231,98 @@ async def execute_tool(
     )
 
 
+async def _execute_get_live_context(
+    request: Request,
+    camera: str,
+    allowed_cameras: List[str],
+) -> Dict[str, Any]:
+    if camera not in allowed_cameras:
+        return {
+            "error": f"Camera '{camera}' not found or access denied",
+        }
+
+    if camera not in request.app.frigate_config.cameras:
+        return {
+            "error": f"Camera '{camera}' not found",
+        }
+
+    try:
+        frame_processor = request.app.detected_frames_processor
+        camera_state = frame_processor.camera_states.get(camera)
+
+        if camera_state is None:
+            return {
+                "error": f"Camera '{camera}' state not available",
+            }
+
+        tracked_objects_dict = {}
+        with camera_state.current_frame_lock:
+            tracked_objects = camera_state.tracked_objects.copy()
+            frame_time = camera_state.current_frame_time
+
+        for obj_id, tracked_obj in tracked_objects.items():
+            obj_dict = tracked_obj.to_dict()
+            if obj_dict.get("frame_time") == frame_time:
+                tracked_objects_dict[obj_id] = {
+                    "label": obj_dict.get("label"),
+                    "zones": obj_dict.get("current_zones", []),
+                    "sub_label": obj_dict.get("sub_label"),
+                    "stationary": obj_dict.get("stationary", False),
+                }
+
+        return {
+            "camera": camera,
+            "timestamp": frame_time,
+            "detections": list(tracked_objects_dict.values()),
+        }
+
+    except Exception as e:
+        logger.error(f"Error executing get_live_context: {e}", exc_info=True)
+        return {
+            "error": f"Error getting live context: {str(e)}",
+        }
+
+
+async def _get_live_frame_image_url(
+    request: Request,
+    camera: str,
+    allowed_cameras: List[str],
+) -> Optional[str]:
+    """
+    Fetch the current live frame for a camera as a base64 data URL.
+
+    Returns None if the frame cannot be retrieved. Used when include_live_image
+    is set to attach the image to the first user message.
+    """
+    if (
+        camera not in allowed_cameras
+        or camera not in request.app.frigate_config.cameras
+    ):
+        return None
+    try:
+        frame_processor = request.app.detected_frames_processor
+        if camera not in frame_processor.camera_states:
+            return None
+        frame = frame_processor.get_current_frame(camera, {})
+        if frame is None:
+            return None
+        height, width = frame.shape[:2]
+        max_dimension = 1024
+        if height > max_dimension or width > max_dimension:
+            scale = max_dimension / max(height, width)
+            frame = cv2.resize(
+                frame,
+                (int(width * scale), int(height * scale)),
+                interpolation=cv2.INTER_AREA,
+            )
+        _, img_encoded = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        b64 = base64.b64encode(img_encoded.tobytes()).decode("utf-8")
+        return f"data:image/jpeg;base64,{b64}"
+    except Exception as e:
+        logger.debug("Failed to get live frame for %s: %s", camera, e)
+        return None
+
+
 async def _execute_tool_internal(
     tool_name: str,
     arguments: Dict[str, Any],
@@ -231,6 +347,11 @@ async def _execute_tool_internal(
         except (json.JSONDecodeError, AttributeError) as e:
             logger.warning(f"Failed to extract tool result: {e}")
             return {"error": "Failed to parse tool result"}
+    elif tool_name == "get_live_context":
+        camera = arguments.get("camera")
+        if not camera:
+            return {"error": "Camera parameter is required"}
+        return await _execute_get_live_context(request, camera, allowed_cameras)
     else:
         return {"error": f"Unknown tool: {tool_name}"}
 
@@ -277,13 +398,43 @@ async def chat_completion(
     current_datetime = datetime.now(timezone.utc)
     current_date_str = current_datetime.strftime("%Y-%m-%d")
     current_time_str = current_datetime.strftime("%H:%M:%S %Z")
+
+    cameras_info = []
+    config = request.app.frigate_config
+    for camera_id in allowed_cameras:
+        if camera_id not in config.cameras:
+            continue
+        camera_config = config.cameras[camera_id]
+        friendly_name = (
+            camera_config.friendly_name
+            if camera_config.friendly_name
+            else camera_id.replace("_", " ").title()
+        )
+        cameras_info.append(f"  - {friendly_name} (ID: {camera_id})")
+
+    cameras_section = ""
+    if cameras_info:
+        cameras_section = (
+            "\n\nAvailable cameras:\n"
+            + "\n".join(cameras_info)
+            + "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
+        )
+
+    live_image_note = ""
+    if body.include_live_image:
+        live_image_note = (
+            f"\n\nThe first user message includes a live image from camera "
+            f"'{body.include_live_image}'. Use get_live_context for that camera to get "
+            "current detection details (objects, zones) to aid in understanding the image."
+        )
+
     system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
 
 Current date and time: {current_date_str} at {current_time_str} (UTC)
 
 When users ask questions about "today", "yesterday", "this week", etc., use the current date above as reference.
 When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
-Always be accurate with time calculations based on the current date provided."""
+Always be accurate with time calculations based on the current date provided.{cameras_section}{live_image_note}"""
 
     conversation.append(
         {
@@ -292,6 +443,7 @@ Always be accurate with time calculations based on the current date provided."""
         }
     )
 
+    first_user_message_seen = False
     for msg in body.messages:
         msg_dict = {
             "role": msg.role,
@@ -301,6 +453,22 @@ Always be accurate with time calculations based on the current date provided."""
             msg_dict["tool_call_id"] = msg.tool_call_id
         if msg.name:
             msg_dict["name"] = msg.name
+
+        if (
+            msg.role == "user"
+            and not first_user_message_seen
+            and body.include_live_image
+        ):
+            first_user_message_seen = True
+            image_url = await _get_live_frame_image_url(
+                request, body.include_live_image, allowed_cameras
+            )
+            if image_url:
+                msg_dict["content"] = [
+                    {"type": "text", "text": msg.content},
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                ]
+
         conversation.append(msg_dict)
 
     tool_iterations = 0
diff --git a/frigate/api/defs/request/chat_body.py b/frigate/api/defs/request/chat_body.py
index 7b327bf5a..fa3c3860a 100644
--- a/frigate/api/defs/request/chat_body.py
+++ b/frigate/api/defs/request/chat_body.py
@@ -32,3 +32,10 @@ class ChatCompletionRequest(BaseModel):
         le=10,
         description="Maximum number of tool call iterations (default: 5)",
     )
+    include_live_image: Optional[str] = Field(
+        default=None,
+        description=(
+            "If set, the current live frame from this camera is attached to the first "
+            "user message as multimodal content. Use with get_live_context for detection info."
+        ),
+    )
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index 4be75f418..fa418f101 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -69,7 +69,7 @@ class GenAIClient:
                 return "\n- (No objects detected)"
 
         context_prompt = f"""
-Your task is to analyze the sequence of images ({len(thumbnails)} total) taken in chronological order from the perspective of the {review_data["camera"]} security camera.
+Your task is to analyze a sequence of images taken in chronological order from a security camera.
 
 ## Normal Activity Patterns for This Property
 
@@ -107,7 +107,8 @@ Your response MUST be a flat JSON object with:
 
 ## Sequence Details
 
-- Frame 1 = earliest, Frame {len(thumbnails)} = latest
+- Camera: {review_data["camera"]}
+- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
 - Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
 - Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
 
diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py
index 5523ce389..fafef74ae 100644
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@@ -216,7 +216,14 @@ class LlamaCppClient(GenAIClient):
                 "finish_reason": "error",
             }
         except requests.exceptions.RequestException as e:
-            logger.warning("llama.cpp returned an error: %s", str(e))
+            error_detail = str(e)
+            if hasattr(e, "response") and e.response is not None:
+                try:
+                    error_body = e.response.text
+                    error_detail = f"{str(e)} - Response: {error_body[:500]}"
+                except Exception:
+                    pass
+            logger.warning("llama.cpp returned an error: %s", error_detail)
             return {
                 "content": None,
                 "tool_calls": None,