mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-01-22 20:18:30 +03:00
Improve handling of images in request
This commit is contained in:
parent
6569f27c26
commit
7e14b8a42e
@ -4,7 +4,7 @@ import base64
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import cv2
|
||||
from fastapi import APIRouter, Body, Depends, Request
|
||||
@ -94,10 +94,10 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
|
||||
"function": {
|
||||
"name": "get_live_context",
|
||||
"description": (
|
||||
"Get the current live view and detection information for a camera. "
|
||||
"Returns the current camera frame as a base64-encoded image along with "
|
||||
"information about objects currently being tracked/detected on the camera. "
|
||||
"Use this to answer questions about what is happening right now on a specific camera."
|
||||
"Get the current detection information for a camera: objects being tracked, "
|
||||
"zones, timestamps. Use this to understand what is visible in the live view. "
|
||||
"Call this when the user has included a live image (via include_live_image) or "
|
||||
"when answering questions about what is happening right now on a specific camera."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
@ -255,16 +255,6 @@ async def _execute_get_live_context(
|
||||
"error": f"Camera '{camera}' state not available",
|
||||
}
|
||||
|
||||
frame = frame_processor.get_current_frame(camera, {})
|
||||
if frame is None:
|
||||
return {
|
||||
"error": f"Unable to get current frame for camera '{camera}'",
|
||||
}
|
||||
|
||||
_, img_encoded = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
image_base64 = base64.b64encode(img_encoded.tobytes()).decode("utf-8")
|
||||
image_data_url = f"data:image/jpeg;base64,{image_base64}"
|
||||
|
||||
tracked_objects_dict = {}
|
||||
with camera_state.current_frame_lock:
|
||||
tracked_objects = camera_state.tracked_objects.copy()
|
||||
@ -283,7 +273,6 @@ async def _execute_get_live_context(
|
||||
return {
|
||||
"camera": camera,
|
||||
"timestamp": frame_time,
|
||||
"image": image_data_url,
|
||||
"detections": list(tracked_objects_dict.values()),
|
||||
}
|
||||
|
||||
@ -294,6 +283,46 @@ async def _execute_get_live_context(
|
||||
}
|
||||
|
||||
|
||||
async def _get_live_frame_image_url(
|
||||
request: Request,
|
||||
camera: str,
|
||||
allowed_cameras: List[str],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Fetch the current live frame for a camera as a base64 data URL.
|
||||
|
||||
Returns None if the frame cannot be retrieved. Used when include_live_image
|
||||
is set to attach the image to the first user message.
|
||||
"""
|
||||
if (
|
||||
camera not in allowed_cameras
|
||||
or camera not in request.app.frigate_config.cameras
|
||||
):
|
||||
return None
|
||||
try:
|
||||
frame_processor = request.app.detected_frames_processor
|
||||
if camera not in frame_processor.camera_states:
|
||||
return None
|
||||
frame = frame_processor.get_current_frame(camera, {})
|
||||
if frame is None:
|
||||
return None
|
||||
height, width = frame.shape[:2]
|
||||
max_dimension = 1024
|
||||
if height > max_dimension or width > max_dimension:
|
||||
scale = max_dimension / max(height, width)
|
||||
frame = cv2.resize(
|
||||
frame,
|
||||
(int(width * scale), int(height * scale)),
|
||||
interpolation=cv2.INTER_AREA,
|
||||
)
|
||||
_, img_encoded = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
b64 = base64.b64encode(img_encoded.tobytes()).decode("utf-8")
|
||||
return f"data:image/jpeg;base64,{b64}"
|
||||
except Exception as e:
|
||||
logger.debug("Failed to get live frame for %s: %s", camera, e)
|
||||
return None
|
||||
|
||||
|
||||
async def _execute_tool_internal(
|
||||
tool_name: str,
|
||||
arguments: Dict[str, Any],
|
||||
@ -391,13 +420,21 @@ async def chat_completion(
|
||||
+ "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
|
||||
)
|
||||
|
||||
live_image_note = ""
|
||||
if body.include_live_image:
|
||||
live_image_note = (
|
||||
f"\n\nThe first user message includes a live image from camera "
|
||||
f"'{body.include_live_image}'. Use get_live_context for that camera to get "
|
||||
"current detection details (objects, zones) to aid in understanding the image."
|
||||
)
|
||||
|
||||
system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
|
||||
|
||||
Current date and time: {current_date_str} at {current_time_str} (UTC)
|
||||
|
||||
When users ask questions about "today", "yesterday", "this week", etc., use the current date above as reference.
|
||||
When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
|
||||
Always be accurate with time calculations based on the current date provided.{cameras_section}"""
|
||||
Always be accurate with time calculations based on the current date provided.{cameras_section}{live_image_note}"""
|
||||
|
||||
conversation.append(
|
||||
{
|
||||
@ -406,6 +443,7 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
}
|
||||
)
|
||||
|
||||
first_user_message_seen = False
|
||||
for msg in body.messages:
|
||||
msg_dict = {
|
||||
"role": msg.role,
|
||||
@ -415,6 +453,22 @@ Always be accurate with time calculations based on the current date provided.{ca
|
||||
msg_dict["tool_call_id"] = msg.tool_call_id
|
||||
if msg.name:
|
||||
msg_dict["name"] = msg.name
|
||||
|
||||
if (
|
||||
msg.role == "user"
|
||||
and not first_user_message_seen
|
||||
and body.include_live_image
|
||||
):
|
||||
first_user_message_seen = True
|
||||
image_url = await _get_live_frame_image_url(
|
||||
request, body.include_live_image, allowed_cameras
|
||||
)
|
||||
if image_url:
|
||||
msg_dict["content"] = [
|
||||
{"type": "text", "text": msg.content},
|
||||
{"type": "image_url", "image_url": {"url": image_url}},
|
||||
]
|
||||
|
||||
conversation.append(msg_dict)
|
||||
|
||||
tool_iterations = 0
|
||||
|
||||
@ -32,3 +32,10 @@ class ChatCompletionRequest(BaseModel):
|
||||
le=10,
|
||||
description="Maximum number of tool call iterations (default: 5)",
|
||||
)
|
||||
include_live_image: Optional[str] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"If set, the current live frame from this camera is attached to the first "
|
||||
"user message as multimodal content. Use with get_live_context for detection info."
|
||||
),
|
||||
)
|
||||
|
||||
@ -216,7 +216,14 @@ class LlamaCppClient(GenAIClient):
|
||||
"finish_reason": "error",
|
||||
}
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning("llama.cpp returned an error: %s", str(e))
|
||||
error_detail = str(e)
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
error_body = e.response.text
|
||||
error_detail = f"{str(e)} - Response: {error_body[:500]}"
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning("llama.cpp returned an error: %s", error_detail)
|
||||
return {
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user