diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index d5004a18d..0d3f75a40 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -39,25 +39,35 @@ class GenAIClient: ) -> None: """Generate a description for the review item activity.""" context_prompt = f""" - Please analyze the image(s), which are in chronological order, strictly from the perspective of the {review_data['camera'].replace('_', ' ')} security camera. - Your task is to provide a **neutral, factual, and objective description** of the scene and the objects interacting with it. - Focus solely on observable actions, visible entities, and the environment. + Please analyze the image(s), which are in chronological order, strictly from the perspective of the {review_data["camera"].replace("_", " ")} security camera. + Your task is to provide a **neutral, factual, and objective description** of the scene, while also including **reasonable, evidence-based inferences** about the likely context or activity — but do not make unfounded assumptions. + + When forming your description: + - Base all statements on visible details in the images. + - You may deduce plausible intent or context only if supported by clear, observable evidence (e.g., someone carrying tools toward a car may indicate vehicle maintenance). + - Avoid implying hostility, criminal intent, or other strong judgments unless the visual evidence is unambiguous. + - Distinguish between what is certain (facts) and what appears likely (reasonable inference). Here is some information we already know: - - the following activity occurred at {review_data['timestamp'].strftime('%I:%M %p')} - - the following objects were detected: {review_data['objects']} - - the following recognized objects were detected: {review_data['recognized_objects']} - - the activity happened in the following zones: {review_data['zones']} + - The activity occurred at {review_data["timestamp"].strftime("%I:%M %p")} + - The following objects were detected: {review_data["objects"]} + - The following recognized objects were detected: {review_data["recognized_objects"]} + - The activity happened in the following zones: {review_data["zones"]} Your response **MUST** be a flat JSON object with the following fields: - - `scene` (string): A single, comprehensive description of the entire visual scene. - - `action` (string): A single description of any key actions or movements. - - `potential_threat_level` (integer, optional): An integer from 0 to 3. Only include if a clear security concern is visible. Omit if no threat. + - `scene` (string): A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence. + - `action` (string): A concise description of the main action(s) or movement(s) taking place. + - `potential_threat_level` (integer, optional): 0–3 scale. Only include if a clear security concern is visible: + - 0 = No concern + - 1 = Unusual but not overtly threatening + - 2 = Suspicious or potentially harmful + - 3 = Clear and immediate threat + Omit this field entirely if there is no observable security concern. - **IMPORTANT:** The value for each field (e.g., "scene", "action") must be a plain string or integer, NOT another JSON object or a description of the field itself. - - Provide the response in the exact JSON format specified by this schema: - {ReviewMetadata.model_json_schema()['properties']} + **IMPORTANT:** + - Values for each field must be plain strings or integers — no nested objects or explanatory text. + - The JSON must strictly match this structure: + {ReviewMetadata.model_json_schema()["properties"]} """ logger.info(f"processing {review_data}") logger.info(f"Got GenAI review: {self._send(context_prompt, thumbnails)}")