mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-11 02:47:37 +03:00
Refactor Review GenAI Prompt (#22353)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / Assemble and push default build (push) Blocked by required conditions
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / Assemble and push default build (push) Blocked by required conditions
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
* Improve title to better capture activity * Improve efficiency of prompt * Use json format for llama.cpp * Cleanup prompt * Add output format for other LLMs
This commit is contained in:
parent
e75b8ca6cc
commit
5254bfd00e
@ -4,20 +4,24 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
class ReviewMetadata(BaseModel):
|
||||
model_config = ConfigDict(extra="ignore", protected_namespaces=())
|
||||
|
||||
title: str = Field(description="A concise title for the activity.")
|
||||
title: str = Field(
|
||||
description="A short title characterizing what took place and where, under 10 words."
|
||||
)
|
||||
scene: str = Field(
|
||||
description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence."
|
||||
description="A chronological narrative of what happens from start to finish."
|
||||
)
|
||||
shortSummary: str = Field(
|
||||
description="A brief 2-sentence summary of the scene, suitable for notifications. Should capture the key activity and context without full detail."
|
||||
description="A brief 2-sentence summary of the scene, suitable for notifications."
|
||||
)
|
||||
confidence: float = Field(
|
||||
description="A float between 0 and 1 representing your overall confidence in this analysis."
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Confidence in the analysis, from 0 to 1.",
|
||||
)
|
||||
potential_threat_level: int = Field(
|
||||
ge=0,
|
||||
le=3,
|
||||
description="An integer representing the potential threat level (1-3). 1: Minor anomaly. 2: Moderate concern. 3: High threat. Only include this field if a clear security concern is observable; otherwise, omit it.",
|
||||
le=2,
|
||||
description="Threat level: 0 = normal, 1 = suspicious, 2 = critical threat.",
|
||||
)
|
||||
other_concerns: list[str] | None = Field(
|
||||
default=None,
|
||||
|
||||
@ -89,12 +89,7 @@ Your task is to analyze a sequence of images taken in chronological order from a
|
||||
|
||||
## Task Instructions
|
||||
|
||||
Your task is to provide a clear, accurate description of the scene that:
|
||||
1. States exactly what is happening based on observable actions and movements.
|
||||
2. Evaluates the activity against the Normal and Suspicious Activity Indicators above.
|
||||
3. Assigns a potential_threat_level (0, 1, or 2) based on the threat level indicators defined above, applying them consistently.
|
||||
|
||||
**Use the activity patterns above as guidance to calibrate your assessment. Match the activity against both normal and suspicious indicators, then use your judgment based on the complete context.**
|
||||
Describe the scene based on observable actions and movements, evaluate the activity against the Activity Indicators above, and assign a potential_threat_level (0, 1, or 2) by applying the threat level indicators consistently.
|
||||
|
||||
## Analysis Guidelines
|
||||
|
||||
@ -108,14 +103,12 @@ When forming your description:
|
||||
- **Consider duration as a primary factor**: Apply the duration thresholds defined in the activity patterns above. Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible.
|
||||
- **Weigh all evidence holistically**: Match the activity against the normal and suspicious patterns defined above, then evaluate based on the complete context (zone, objects, time, actions, duration). Apply the threat level indicators consistently. Use your judgment for edge cases.
|
||||
|
||||
## Response Format
|
||||
## Response Field Guidelines
|
||||
|
||||
Your response MUST be a flat JSON object with:
|
||||
- `scene` (string): A narrative description of what happens across the sequence from start to finish, in chronological order. Start by describing how the sequence begins, then describe the progression of events. **Describe all significant movements and actions in the order they occur.** For example, if a vehicle arrives and then a person exits, describe both actions sequentially. **Only describe actions you can actually observe happening in the frames provided.** Do not infer or assume actions that aren't visible (e.g., if you see someone walking but never see them sit, don't say they sat down). Include setting, detected objects, and their observable actions. Avoid speculation or filling in assumed behaviors. Your description should align with and support the threat level you assign.
|
||||
- `title` (string): A concise, grammatically complete title in the format "[Subject] [action verb] [context]" that matches your scene description. Use names from "Objects in Scene" when you visually observe them.
|
||||
- `shortSummary` (string): A brief 2-sentence summary of the scene, suitable for notifications. Should capture the key activity and context without full detail. This should be a condensed version of the scene description above.
|
||||
- `confidence` (float): 0-1 confidence in your analysis. Higher confidence when objects/actions are clearly visible and context is unambiguous. Lower confidence when the sequence is unclear, objects are partially obscured, or context is ambiguous.
|
||||
- `potential_threat_level` (integer): 0, 1, or 2 as defined in "Normal Activity Patterns for This Property" above. Your threat level must be consistent with your scene description and the guidance above.
|
||||
Respond with a JSON object matching the provided schema. Field-specific guidance:
|
||||
- `scene`: Describe how the sequence begins, then the progression of events — all significant movements and actions in order. For example, if a vehicle arrives and then a person exits, describe both sequentially. Your description should align with and support the threat level you assign.
|
||||
- `title`: Characterize **what took place and where** — interpret the overall purpose or outcome, do not simply compress the scene description into fewer words. Include the relevant location (zone, area, or entry point). Always include subject names from "Objects in Scene" — do not replace named subjects with generic terms. No editorial qualifiers like "routine" or "suspicious."
|
||||
- `potential_threat_level`: Must be consistent with your scene description and the activity patterns above.
|
||||
{get_concern_prompt()}
|
||||
|
||||
## Sequence Details
|
||||
@ -134,10 +127,6 @@ Each line represents a detection state, not necessarily unique individuals. Pare
|
||||
**Note: Unidentified objects (without names) are NOT indicators of suspicious activity—they simply mean the system hasn't identified that object.**
|
||||
{get_objects_list()}
|
||||
|
||||
## Important Notes
|
||||
- Values must be plain strings, floats, or integers — no nested objects, no extra commentary.
|
||||
- Only describe objects from the "Objects in Scene" list above. Do not hallucinate additional objects.
|
||||
- When describing people or vehicles, use the exact names provided.
|
||||
{get_language_prompt()}
|
||||
"""
|
||||
logger.debug(
|
||||
@ -153,7 +142,27 @@ Each line represents a detection state, not necessarily unique individuals. Pare
|
||||
) as f:
|
||||
f.write(context_prompt)
|
||||
|
||||
response = self._send(context_prompt, thumbnails)
|
||||
# Build JSON schema for structured output from ReviewMetadata model
|
||||
schema = ReviewMetadata.model_json_schema()
|
||||
schema.get("properties", {}).pop("time", None)
|
||||
|
||||
if "time" in schema.get("required", []):
|
||||
schema["required"].remove("time")
|
||||
if not concerns:
|
||||
schema.get("properties", {}).pop("other_concerns", None)
|
||||
if "other_concerns" in schema.get("required", []):
|
||||
schema["required"].remove("other_concerns")
|
||||
|
||||
response_format = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "review_metadata",
|
||||
"strict": True,
|
||||
"schema": schema,
|
||||
},
|
||||
}
|
||||
|
||||
response = self._send(context_prompt, thumbnails, response_format)
|
||||
|
||||
if debug_save and response:
|
||||
with open(
|
||||
@ -297,7 +306,12 @@ Guidelines:
|
||||
"""Initialize the client."""
|
||||
return None
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to the provider."""
|
||||
return None
|
||||
|
||||
|
||||
@ -42,13 +42,18 @@ class OpenAIClient(GenAIClient):
|
||||
azure_endpoint=azure_endpoint,
|
||||
)
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to Azure OpenAI."""
|
||||
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
|
||||
try:
|
||||
result = self.provider.chat.completions.create(
|
||||
model=self.genai_config.model,
|
||||
messages=[
|
||||
request_params = {
|
||||
"model": self.genai_config.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "text", "text": prompt}]
|
||||
@ -64,9 +69,12 @@ class OpenAIClient(GenAIClient):
|
||||
],
|
||||
},
|
||||
],
|
||||
timeout=self.timeout,
|
||||
"timeout": self.timeout,
|
||||
**self.genai_config.runtime_options,
|
||||
)
|
||||
}
|
||||
if response_format:
|
||||
request_params["response_format"] = response_format
|
||||
result = self.provider.chat.completions.create(**request_params)
|
||||
except Exception as e:
|
||||
logger.warning("Azure OpenAI returned an error: %s", str(e))
|
||||
return None
|
||||
|
||||
@ -42,7 +42,12 @@ class GeminiClient(GenAIClient):
|
||||
http_options=types.HttpOptions(**http_options_dict),
|
||||
)
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to Gemini."""
|
||||
contents = [
|
||||
types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images
|
||||
@ -52,6 +57,12 @@ class GeminiClient(GenAIClient):
|
||||
generation_config_dict = {"candidate_count": 1}
|
||||
generation_config_dict.update(self.genai_config.runtime_options)
|
||||
|
||||
if response_format and response_format.get("type") == "json_schema":
|
||||
generation_config_dict["response_mime_type"] = "application/json"
|
||||
schema = response_format.get("json_schema", {}).get("schema")
|
||||
if schema:
|
||||
generation_config_dict["response_schema"] = schema
|
||||
|
||||
response = self.provider.models.generate_content(
|
||||
model=self.genai_config.model,
|
||||
contents=contents,
|
||||
|
||||
@ -57,7 +57,12 @@ class LlamaCppClient(GenAIClient):
|
||||
else None
|
||||
)
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to llama.cpp server."""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
@ -96,6 +101,9 @@ class LlamaCppClient(GenAIClient):
|
||||
**self.provider_options,
|
||||
}
|
||||
|
||||
if response_format:
|
||||
payload["response_format"] = response_format
|
||||
|
||||
response = requests.post(
|
||||
f"{self.provider}/v1/chat/completions",
|
||||
json=payload,
|
||||
|
||||
@ -53,7 +53,12 @@ class OllamaClient(GenAIClient):
|
||||
logger.warning("Error initializing Ollama: %s", str(e))
|
||||
return None
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to Ollama"""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
@ -65,6 +70,10 @@ class OllamaClient(GenAIClient):
|
||||
**self.provider_options,
|
||||
**self.genai_config.runtime_options,
|
||||
}
|
||||
if response_format and response_format.get("type") == "json_schema":
|
||||
schema = response_format.get("json_schema", {}).get("schema")
|
||||
if schema:
|
||||
ollama_options["format"] = schema
|
||||
result = self.provider.generate(
|
||||
self.genai_config.model,
|
||||
prompt,
|
||||
|
||||
@ -36,7 +36,12 @@ class OpenAIClient(GenAIClient):
|
||||
|
||||
return OpenAI(api_key=self.genai_config.api_key, **provider_opts)
|
||||
|
||||
def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
images: list[bytes],
|
||||
response_format: Optional[dict] = None,
|
||||
) -> Optional[str]:
|
||||
"""Submit a request to OpenAI."""
|
||||
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
|
||||
messages_content = []
|
||||
@ -57,17 +62,20 @@ class OpenAIClient(GenAIClient):
|
||||
}
|
||||
)
|
||||
try:
|
||||
result = self.provider.chat.completions.create(
|
||||
model=self.genai_config.model,
|
||||
messages=[
|
||||
request_params = {
|
||||
"model": self.genai_config.model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": messages_content,
|
||||
},
|
||||
],
|
||||
timeout=self.timeout,
|
||||
"timeout": self.timeout,
|
||||
**self.genai_config.runtime_options,
|
||||
)
|
||||
}
|
||||
if response_format:
|
||||
request_params["response_format"] = response_format
|
||||
result = self.provider.chat.completions.create(**request_params)
|
||||
if (
|
||||
result is not None
|
||||
and hasattr(result, "choices")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user