Migrate prompts to dedicated folder

This commit is contained in:
Nicolas Mowen 2026-05-19 09:00:43 -06:00
parent 9fafcdd6cd
commit f5b7395d69
2 changed files with 232 additions and 148 deletions

View File

@ -1,6 +1,5 @@
"""Generative AI module for Frigate."""
import datetime
import importlib
import json
import logging
@ -9,13 +8,18 @@ import re
from typing import Any, Callable, Optional
import numpy as np
from playhouse.shortcuts import model_to_dict
from pydantic import ValidationError
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
from frigate.const import CLIPS_DIR
from frigate.data_processing.post.types import ReviewMetadata
from frigate.genai.manager import GenAIClientManager
from frigate.genai.prompts import (
build_object_description_prompt,
build_review_description_prompt,
build_review_description_response_format,
build_review_summary_prompt,
)
from frigate.models import Event
logger = logging.getLogger(__name__)
@ -61,75 +65,14 @@ class GenAIClient:
activity_context_prompt: str,
) -> ReviewMetadata | None:
"""Generate a description for the review item activity."""
context_prompt = build_review_description_prompt(
review_data,
thumbnails,
concerns,
preferred_language,
activity_context_prompt,
)
def get_concern_prompt() -> str:
if concerns:
concern_list = "\n - ".join(concerns)
return f"""- `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring:
- {concern_list}"""
else:
return ""
def get_language_prompt() -> str:
if preferred_language:
return f"Provide your answer in {preferred_language}"
else:
return ""
def get_objects_list() -> str:
if review_data["unified_objects"]:
return "\n- " + "\n- ".join(review_data["unified_objects"])
else:
return "\n- (No objects detected)"
context_prompt = f"""
Your task is to analyze a sequence of images taken in chronological order from a security camera.
## Normal Activity Patterns for This Property
{activity_context_prompt}
## Task Instructions
Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
## Analysis Guidelines
When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
## Response Field Guidelines
Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity for example, a vehicle entering the frame or pulling into the driveway even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `` separator in "Objects in Scene"), always use their name do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
## Sequence Details
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
## Objects in Scene
Each line represents a detection state, not necessarily unique individuals. The `` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
**Note: Unidentified objects (without names) are NOT indicators of suspicious activitythey simply mean the system hasn't identified that object.**
{get_objects_list()}
{get_language_prompt()}
"""
logger.debug(
f"Sending {len(thumbnails)} images to create review description on {review_data['camera']}"
)
@ -143,25 +86,7 @@ Each line represents a detection state, not necessarily unique individuals. The
) as f:
f.write(context_prompt)
# Build JSON schema for structured output from ReviewMetadata model
schema = ReviewMetadata.model_json_schema()
schema.get("properties", {}).pop("time", None)
if "time" in schema.get("required", []):
schema["required"].remove("time")
if not concerns:
schema.get("properties", {}).pop("other_concerns", None)
if "other_concerns" in schema.get("required", []):
schema["required"].remove("other_concerns")
response_format = {
"type": "json_schema",
"json_schema": {
"name": "review_metadata",
"strict": True,
"schema": schema,
},
}
response_format = build_review_description_response_format(concerns)
response = self._send(context_prompt, thumbnails, response_format)
@ -240,61 +165,9 @@ Each line represents a detection state, not necessarily unique individuals. The
debug_save: bool,
) -> str | None:
"""Generate a summary of review item descriptions over a period of time."""
time_range = f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')} to {datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
timeline_summary_prompt = f"""
You are a security officer writing a concise security report.
Time range: {time_range}
Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
Report Structure - Use this EXACT format:
# Security Summary - {time_range}
## Overview
[Write 1-2 sentences summarizing the overall activity pattern during this period.]
---
## Timeline
[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
### [Time Block Name]
**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
[Repeat for each event in chronological order within the time block]
---
## Summary
[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: Normal, Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
"""
timeline_summary_prompt += "\n\nEvents:\n"
for event in events:
timeline_summary_prompt += f"\n{event}\n"
if preferred_language:
timeline_summary_prompt += f"\nProvide your answer in {preferred_language}"
timeline_summary_prompt = build_review_summary_prompt(
start_ts, end_ts, events, preferred_language
)
if debug_save:
with open(
@ -326,10 +199,7 @@ Guidelines:
) -> Optional[str]:
"""Generate a description for the frame."""
try:
prompt = camera_config.objects.genai.object_prompts.get(
str(event.label),
camera_config.objects.genai.prompt,
).format(**model_to_dict(event))
prompt = build_object_description_prompt(camera_config, event)
except KeyError as e:
logger.error(f"Invalid key in GenAI prompt: {e}")
return None

214
frigate/genai/prompts.py Normal file
View File

@ -0,0 +1,214 @@
"""Prompt and response-format builders for GenAI features.
Centralizes the per-feature prompt framing and structured-output schema
shaping so provider clients in :mod:`frigate.genai.plugins` only handle
transport.
"""
import datetime
from typing import Any
from playhouse.shortcuts import model_to_dict
from frigate.config import CameraConfig
from frigate.data_processing.post.types import ReviewMetadata
from frigate.models import Event
def build_review_description_prompt(
review_data: dict[str, Any],
thumbnails: list[bytes],
concerns: list[str],
preferred_language: str | None,
activity_context_prompt: str,
) -> str:
"""Build the prompt for review activity description generation."""
def get_concern_prompt() -> str:
if concerns:
concern_list = "\n - ".join(concerns)
return (
"\n- `other_concerns` (list of strings): Include a list of any of "
"the following concerns that are occurring:\n"
f" - {concern_list}"
)
else:
return ""
def get_language_prompt() -> str:
if preferred_language:
return f"Provide your answer in {preferred_language}"
else:
return ""
def get_objects_list() -> str:
if review_data["unified_objects"]:
return "\n- " + "\n- ".join(review_data["unified_objects"])
else:
return "\n- (No objects detected)"
return f"""
Your task is to analyze a sequence of images taken in chronological order from a security camera.
## Normal Activity Patterns for This Property
{activity_context_prompt}
## Task Instructions
Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
## Analysis Guidelines
When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
## Response Field Guidelines
Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity for example, a vehicle entering the frame or pulling into the driveway even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `` separator in "Objects in Scene"), always use their name do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
{get_concern_prompt()}
## Sequence Details
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
## Objects in Scene
Each line represents a detection state, not necessarily unique individuals. The `` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
**Note: Unidentified objects (without names) are NOT indicators of suspicious activitythey simply mean the system hasn't identified that object.**
{get_objects_list()}
{get_language_prompt()}
"""
def build_review_description_response_format(concerns: list[str]) -> dict[str, Any]:
"""Build the structured-output JSON schema for review descriptions.
Strips the `time` field (populated server-side) and drops
`other_concerns` when no concerns are configured.
"""
schema = ReviewMetadata.model_json_schema()
schema.get("properties", {}).pop("time", None)
if "time" in schema.get("required", []):
schema["required"].remove("time")
if not concerns:
schema.get("properties", {}).pop("other_concerns", None)
if "other_concerns" in schema.get("required", []):
schema["required"].remove("other_concerns")
return {
"type": "json_schema",
"json_schema": {
"name": "review_metadata",
"strict": True,
"schema": schema,
},
}
def build_review_summary_prompt(
start_ts: float,
end_ts: float,
events: list[dict[str, Any]],
preferred_language: str | None,
) -> str:
"""Build the prompt for a multi-event review summary."""
time_range = (
f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')}"
f" to "
f"{datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
)
prompt = f"""
You are a security officer writing a concise security report.
Time range: {time_range}
Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
Report Structure - Use this EXACT format:
# Security Summary - {time_range}
## Overview
[Write 1-2 sentences summarizing the overall activity pattern during this period.]
---
## Timeline
[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
### [Time Block Name]
**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
[Repeat for each event in chronological order within the time block]
---
## Summary
[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: Normal, Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
"""
prompt += "\n\nEvents:\n"
for event in events:
prompt += f"\n{event}\n"
if preferred_language:
prompt += f"\nProvide your answer in {preferred_language}"
return prompt
def build_object_description_prompt(
camera_config: CameraConfig,
event: Event,
) -> str:
"""Build the prompt for a per-object description.
Pulls the per-label override from `objects.genai.object_prompts`, falling
back to the camera default, and interpolates event fields.
Raises:
KeyError: if the user-defined prompt template references an unknown
event field.
"""
template = camera_config.objects.genai.object_prompts.get(
str(event.label),
camera_config.objects.genai.prompt,
)
return template.format(**model_to_dict(event))