Migrate prompts to dedicated folder

2026-06-21 03:41:55 +03:00 · 2026-05-19 09:00:43 -06:00 · 2026-05-19 09:00:43 -06:00 · f5b7395d69
commit f5b7395d69
parent 9fafcdd6cd
2 changed files with 232 additions and 148 deletions
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -1,6 +1,5 @@
 """Generative AI module for Frigate."""

-import datetime
 import importlib
 import json
 import logging
@ -9,13 +8,18 @@ import re
 from typing import Any, Callable, Optional

 import numpy as np
-from playhouse.shortcuts import model_to_dict
 from pydantic import ValidationError

 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
 from frigate.const import CLIPS_DIR
 from frigate.data_processing.post.types import ReviewMetadata
 from frigate.genai.manager import GenAIClientManager
+from frigate.genai.prompts import (
+    build_object_description_prompt,
+    build_review_description_prompt,
+    build_review_description_response_format,
+    build_review_summary_prompt,
+)
 from frigate.models import Event

 logger = logging.getLogger(__name__)
@ -61,75 +65,14 @@ class GenAIClient:
        activity_context_prompt: str,
    ) -> ReviewMetadata | None:
        """Generate a description for the review item activity."""
+        context_prompt = build_review_description_prompt(
+            review_data,
+            thumbnails,
+            concerns,
+            preferred_language,
+            activity_context_prompt,
+        )

-        def get_concern_prompt() -> str:
-            if concerns:
-                concern_list = "\n    - ".join(concerns)
-                return f"""- `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring:
-    - {concern_list}"""
-            else:
-                return ""
-
-        def get_language_prompt() -> str:
-            if preferred_language:
-                return f"Provide your answer in {preferred_language}"
-            else:
-                return ""
-
-        def get_objects_list() -> str:
-            if review_data["unified_objects"]:
-                return "\n- " + "\n- ".join(review_data["unified_objects"])
-            else:
-                return "\n- (No objects detected)"
-
-        context_prompt = f"""
-Your task is to analyze a sequence of images taken in chronological order from a security camera.
-
-## Normal Activity Patterns for This Property
-
-{activity_context_prompt}
-
-## Task Instructions
-
-Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
-
-## Analysis Guidelines
-
-When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day context—do not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
-
-## Response Field Guidelines
-
-Respond with a JSON object matching the provided schema. Field-specific guidance:
- `observations`: Include the very start of the activity — for example, a vehicle entering the frame or pulling into the driveway — even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
- `scene`: Describe how the sequence begins, then the progression of events — all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `←` separator in "Objects in Scene"), always use their name — do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
- `shortSummary`: Briefly summarize the primary activity across the observations.
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
-
-## Sequence Details
-
- Camera: {review_data["camera"]}
- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
-
-## Objects in Scene
-
-Each line represents a detection state, not necessarily unique individuals. The `←` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
-
-**Note: Unidentified objects (without names) are NOT indicators of suspicious activity—they simply mean the system hasn't identified that object.**
-{get_objects_list()}
-
-{get_language_prompt()}
-"""
        logger.debug(
            f"Sending {len(thumbnails)} images to create review description on {review_data['camera']}"
        )
@ -143,25 +86,7 @@ Each line represents a detection state, not necessarily unique individuals. The
            ) as f:
                f.write(context_prompt)

-        # Build JSON schema for structured output from ReviewMetadata model
-        schema = ReviewMetadata.model_json_schema()
-        schema.get("properties", {}).pop("time", None)
-
-        if "time" in schema.get("required", []):
-            schema["required"].remove("time")
-        if not concerns:
-            schema.get("properties", {}).pop("other_concerns", None)
-            if "other_concerns" in schema.get("required", []):
-                schema["required"].remove("other_concerns")
-
-        response_format = {
-            "type": "json_schema",
-            "json_schema": {
-                "name": "review_metadata",
-                "strict": True,
-                "schema": schema,
-            },
-        }
+        response_format = build_review_description_response_format(concerns)

        response = self._send(context_prompt, thumbnails, response_format)

@ -240,61 +165,9 @@ Each line represents a detection state, not necessarily unique individuals. The
        debug_save: bool,
    ) -> str | None:
        """Generate a summary of review item descriptions over a period of time."""
-        time_range = f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')} to {datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
-        timeline_summary_prompt = f"""
-You are a security officer writing a concise security report.
-
-Time range: {time_range}
-
-Input format: Each event is a JSON object with:
- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
- "context": array of related events from other cameras that occurred during overlapping time periods
-
-**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
-
-Report Structure - Use this EXACT format:
-
-# Security Summary - {time_range}
-
-## Overview
-[Write 1-2 sentences summarizing the overall activity pattern during this period.]
-
---
-
-## Timeline
-
-[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
-
-### [Time Block Name]
-
-**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
- [Event title]: [Clear description incorporating contextual information from the "context" array]
- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
-
-[Repeat for each event in chronological order within the time block]
-
---
-
-## Summary
-[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
-
-Guidelines:
- List ALL events in chronological order, grouped by time blocks
- Threat level indicators: ✓ Normal, ⚠️ Needs review, 🔴 Security concern
- Integrate contextual information naturally - use the "context" array to enrich each event's description
- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
- Be concise but informative - focus on what happened and what it means
- If contextual information makes an event clearly normal, reflect that in your assessment
- Only create time blocks that have events - don't create empty sections
-"""
-
-        timeline_summary_prompt += "\n\nEvents:\n"
-        for event in events:
-            timeline_summary_prompt += f"\n{event}\n"
-
-        if preferred_language:
-            timeline_summary_prompt += f"\nProvide your answer in {preferred_language}"
+        timeline_summary_prompt = build_review_summary_prompt(
+            start_ts, end_ts, events, preferred_language
+        )

        if debug_save:
            with open(
@ -326,10 +199,7 @@ Guidelines:
    ) -> Optional[str]:
        """Generate a description for the frame."""
        try:
-            prompt = camera_config.objects.genai.object_prompts.get(
-                str(event.label),
-                camera_config.objects.genai.prompt,
-            ).format(**model_to_dict(event))
+            prompt = build_object_description_prompt(camera_config, event)
        except KeyError as e:
            logger.error(f"Invalid key in GenAI prompt: {e}")
            return None
--- a/frigate/genai/prompts.py
+++ b/frigate/genai/prompts.py
@ -0,0 +1,214 @@
+"""Prompt and response-format builders for GenAI features.
+
+Centralizes the per-feature prompt framing and structured-output schema
+shaping so provider clients in :mod:`frigate.genai.plugins` only handle
+transport.
+"""
+
+import datetime
+from typing import Any
+
+from playhouse.shortcuts import model_to_dict
+
+from frigate.config import CameraConfig
+from frigate.data_processing.post.types import ReviewMetadata
+from frigate.models import Event
+
+
+def build_review_description_prompt(
+    review_data: dict[str, Any],
+    thumbnails: list[bytes],
+    concerns: list[str],
+    preferred_language: str | None,
+    activity_context_prompt: str,
+) -> str:
+    """Build the prompt for review activity description generation."""
+
+    def get_concern_prompt() -> str:
+        if concerns:
+            concern_list = "\n    - ".join(concerns)
+            return (
+                "\n- `other_concerns` (list of strings): Include a list of any of "
+                "the following concerns that are occurring:\n"
+                f"    - {concern_list}"
+            )
+        else:
+            return ""
+
+    def get_language_prompt() -> str:
+        if preferred_language:
+            return f"Provide your answer in {preferred_language}"
+        else:
+            return ""
+
+    def get_objects_list() -> str:
+        if review_data["unified_objects"]:
+            return "\n- " + "\n- ".join(review_data["unified_objects"])
+        else:
+            return "\n- (No objects detected)"
+
+    return f"""
+Your task is to analyze a sequence of images taken in chronological order from a security camera.
+
+## Normal Activity Patterns for This Property
+
+{activity_context_prompt}
+
+## Task Instructions
+
+Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
+
+## Analysis Guidelines
+
+When forming your description:
+- **CRITICAL: Only describe objects explicitly listed in "Objects in Scene" below.** Do not infer or mention additional people, vehicles, or objects not present in this list, even if visual patterns suggest them. If only a car is listed, do not describe a person interacting with it unless "person" is also in the objects list.
+- **Only describe actions actually visible in the frames.** Do not assume or infer actions that you don't observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
+- Describe what you observe: actions, movements, interactions with objects and the environment. Include any observable environmental changes (e.g., lighting changes triggered by activity).
+- Note visible details such as clothing, items being carried or placed, tools or equipment present, and how they interact with the property or objects.
+- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
+- **Use the actual timestamp provided in "Activity started at"** below for time of day context—do not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
+- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
+- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
+
+## Response Field Guidelines
+
+Respond with a JSON object matching the provided schema. Field-specific guidance:
+- `observations`: Include the very start of the activity — for example, a vehicle entering the frame or pulling into the driveway — even if it lasts only a few frames and the rest of the clip is dominated by a longer activity. Include each arrival, departure, object handled, and notable change in position or state. Each item is a single concrete fact written as a complete sentence.
+- `scene`: Describe how the sequence begins, then the progression of events — all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. For named subjects (those with a `←` separator in "Objects in Scene"), always use their name — do not replace them with generic terms. For unnamed objects (e.g., "person", "car"), refer to them naturally with articles (e.g., "a person", "the car"). Your description should align with and support the threat level you assign.
+- `title`: Name the primary activity across the observations, together with the location. An activity is what is being done with objects, tools, or surfaces; locomotion through the scene qualifies as the activity only when no other interaction is observed. For named subjects, always use their name. For unnamed objects, refer to them naturally with articles.
+- `shortSummary`: Briefly summarize the primary activity across the observations.
+- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
+{get_concern_prompt()}
+
+## Sequence Details
+
+- Camera: {review_data["camera"]}
+- Total frames: {len(thumbnails)} (Frame 1 = earliest, Frame {len(thumbnails)} = latest)
+- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds
+- Zones involved: {", ".join(review_data["zones"]) if review_data["zones"] else "None"}
+
+## Objects in Scene
+
+Each line represents a detection state, not necessarily unique individuals. The `←` symbol separates a recognized subject's name from their object type — use only the name (before the `←`) in your response, not the type after it. The same subject may appear across multiple lines if detected multiple times.
+
+**Note: Unidentified objects (without names) are NOT indicators of suspicious activity—they simply mean the system hasn't identified that object.**
+{get_objects_list()}
+
+{get_language_prompt()}
+"""
+
+
+def build_review_description_response_format(concerns: list[str]) -> dict[str, Any]:
+    """Build the structured-output JSON schema for review descriptions.
+
+    Strips the `time` field (populated server-side) and drops
+    `other_concerns` when no concerns are configured.
+    """
+    schema = ReviewMetadata.model_json_schema()
+    schema.get("properties", {}).pop("time", None)
+
+    if "time" in schema.get("required", []):
+        schema["required"].remove("time")
+    if not concerns:
+        schema.get("properties", {}).pop("other_concerns", None)
+        if "other_concerns" in schema.get("required", []):
+            schema["required"].remove("other_concerns")
+
+    return {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "review_metadata",
+            "strict": True,
+            "schema": schema,
+        },
+    }
+
+
+def build_review_summary_prompt(
+    start_ts: float,
+    end_ts: float,
+    events: list[dict[str, Any]],
+    preferred_language: str | None,
+) -> str:
+    """Build the prompt for a multi-event review summary."""
+    time_range = (
+        f"{datetime.datetime.fromtimestamp(start_ts).strftime('%B %d, %Y at %I:%M %p')}"
+        f" to "
+        f"{datetime.datetime.fromtimestamp(end_ts).strftime('%B %d, %Y at %I:%M %p')}"
+    )
+    prompt = f"""
+You are a security officer writing a concise security report.
+
+Time range: {time_range}
+
+Input format: Each event is a JSON object with:
+- "title", "scene", "confidence", "potential_threat_level" (0-2), "other_concerns", "camera", "time", "start_time", "end_time"
+- "context": array of related events from other cameras that occurred during overlapping time periods
+
+**Note: Use the "scene" field for event descriptions in the report. Ignore any "shortSummary" field if present.**
+
+Report Structure - Use this EXACT format:
+
+# Security Summary - {time_range}
+
+## Overview
+[Write 1-2 sentences summarizing the overall activity pattern during this period.]
+
+---
+
+## Timeline
+
+[Group events by time periods (e.g., "Morning (6:00 AM - 12:00 PM)", "Afternoon (12:00 PM - 5:00 PM)", "Evening (5:00 PM - 9:00 PM)", "Night (9:00 PM - 6:00 AM)"). Use appropriate time blocks based on when events occurred.]
+
+### [Time Block Name]
+
+**HH:MM AM/PM** | [Camera Name] | [Threat Level Indicator]
+- [Event title]: [Clear description incorporating contextual information from the "context" array]
+- Context: [If context array has items, mention them here, e.g., "Delivery truck present on Front Driveway Cam (HH:MM AM/PM)"]
+- Assessment: [Brief assessment incorporating context - if context explains the event, note it here]
+
+[Repeat for each event in chronological order within the time block]
+
+---
+
+## Summary
+[One sentence summarizing the period. If all events are normal/explained: "Routine activity observed." If review needed: "Some activity requires review but no security concerns." If security concerns: "Security concerns requiring immediate attention."]
+
+Guidelines:
+- List ALL events in chronological order, grouped by time blocks
+- Threat level indicators: ✓ Normal, ⚠️ Needs review, 🔴 Security concern
+- Integrate contextual information naturally - use the "context" array to enrich each event's description
+- If context explains the event (e.g., delivery truck explains person at door), describe it accordingly (e.g., "delivery person" not "unidentified person")
+- Be concise but informative - focus on what happened and what it means
+- If contextual information makes an event clearly normal, reflect that in your assessment
+- Only create time blocks that have events - don't create empty sections
+"""
+
+    prompt += "\n\nEvents:\n"
+    for event in events:
+        prompt += f"\n{event}\n"
+
+    if preferred_language:
+        prompt += f"\nProvide your answer in {preferred_language}"
+
+    return prompt
+
+
+def build_object_description_prompt(
+    camera_config: CameraConfig,
+    event: Event,
+) -> str:
+    """Build the prompt for a per-object description.
+
+    Pulls the per-label override from `objects.genai.object_prompts`, falling
+    back to the camera default, and interpolates event fields.
+
+    Raises:
+        KeyError: if the user-defined prompt template references an unknown
+            event field.
+    """
+    template = camera_config.objects.genai.object_prompts.get(
+        str(event.label),
+        camera_config.objects.genai.prompt,
+    )
+    return template.format(**model_to_dict(event))