From 5900c4936b0c0197c1ba124eeca70737c08d851c Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Sun, 26 Apr 2026 08:05:06 -0600
Subject: [PATCH] Use pydantic but don't fail if some constraints are not met.

---
 frigate/data_processing/post/types.py | 38 ++++++++++---
 frigate/genai/__init__.py             | 81 +++++++++++----------------
 2 files changed, 64 insertions(+), 55 deletions(-)

diff --git a/frigate/data_processing/post/types.py b/frigate/data_processing/post/types.py
index b4deb1ddd..0b2e6b108 100644
--- a/frigate/data_processing/post/types.py
+++ b/frigate/data_processing/post/types.py
@@ -1,25 +1,47 @@
-from pydantic import BaseModel, ConfigDict, Field
+from typing import Annotated
+
+from pydantic import BaseModel, ConfigDict, Field, StringConstraints
+
+ObservationItem = Annotated[str, StringConstraints(min_length=20, max_length=160)]
 
 
 class ReviewMetadata(BaseModel):
     model_config = ConfigDict(extra="ignore", protected_namespaces=())
 
-    observations: list[str] = Field(
-        default_factory=list,
-        description="Chronological list of significant observations from the frames, written before the scene narrative is composed.",
+    observations: list[ObservationItem] = Field(
+        ...,
+        min_length=3,
+        max_length=15,
+        description=(
+            "Enumerate the significant observations across all frames, in "
+            "chronological order, BEFORE composing the scene narrative. "
+            "Include the very start of the activity — for example, a vehicle "
+            "entering the frame or pulling into the driveway — even if it "
+            "lasts only a few frames and the rest of the clip is dominated "
+            "by a longer activity. Include each arrival, departure, motion "
+            "event, object handled, and notable change in position or state. "
+            "Each item is a single concrete fact written as a complete "
+            "sentence. Do not summarize, interpret, or assign meaning here — "
+            "that belongs in the scene field."
+        ),
     )
     title: str = Field(
-        description="A short title characterizing what took place and where, under 10 words."
+        max_length=80,
+        description="A short title characterizing what took place and where, under 10 words.",
     )
     scene: str = Field(
-        description="A chronological narrative of what happens from start to finish.",
+        min_length=120,
+        max_length=600,
+        description="A chronological narrative of what happens from start to finish, drawing directly from the items in observations.",
     )
     shortSummary: str = Field(
-        description="A brief 2-sentence summary of the scene, suitable for notifications."
+        min_length=70,
+        max_length=100,
+        description="A brief 2-sentence summary of the scene, suitable for notifications.",
     )
     confidence: float = Field(
         ge=0.0,
-        description="Confidence in the analysis, from 0 to 1.",
+        description="Confidence in the analysis as a decimal between 0.0 and 1.0, where 0.0 means no confidence and 1.0 means complete confidence. Express ONLY as a decimal.",
     )
     potential_threat_level: int = Field(
         ge=0,
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index 203619dea..dd61f1194 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -2,6 +2,7 @@
 
 import datetime
 import importlib
+import json
 import logging
 import os
 import re
@@ -9,6 +10,7 @@ from typing import Any, Callable, Optional
 
 import numpy as np
 from playhouse.shortcuts import model_to_dict
+from pydantic import ValidationError
 
 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
 from frigate.const import CLIPS_DIR
@@ -151,50 +153,6 @@ Each line represents a detection state, not necessarily unique individuals. The
             if "other_concerns" in schema.get("required", []):
                 schema["required"].remove("other_concerns")
 
-        # Length hints injected into the schema as suggestions to the model
-        # (enforced by grammar-based providers like llama.cpp) but kept off the
-        # Pydantic model so a non-compliant response does not fail validation.
-        length_hints = {
-            "scene": {"minLength": 120, "maxLength": 600},
-            "shortSummary": {"minLength": 70, "maxLength": 100},
-        }
-        for field, hints in length_hints.items():
-            prop = schema.get("properties", {}).get(field)
-            if prop is not None:
-                prop.update(hints)
-
-        # observations is a chain-of-thought-by-schema field: forcing the model
-        # to enumerate concrete facts before writing scene/title surfaces details
-        # the narrative would otherwise gloss past (e.g. brief vehicle arrivals
-        # overshadowed by a longer activity). The minItems floor scales with
-        # event duration so longer clips get more observations.
-        observations_prop = schema.get("properties", {}).get("observations")
-        if observations_prop is not None:
-            duration_seconds = float(review_data.get("duration") or 0)
-            min_observations = max(3, round(duration_seconds / 5))
-            max_observations = min_observations + 8
-            observations_prop["description"] = (
-                "Enumerate the significant observations across all frames, in "
-                "chronological order, BEFORE composing the scene narrative. "
-                "Include the very start of the activity — for example, a "
-                "vehicle entering the frame or pulling into the driveway — "
-                "even if it lasts only a few frames and the rest of the clip "
-                "is dominated by a longer activity. Include each arrival, "
-                "departure, motion event, object handled, and notable change "
-                "in position or state. Each item is a single concrete fact "
-                "written as a complete sentence (e.g., 'A blue sedan turns "
-                "from the street into the driveway', 'Nick exits the driver "
-                "side carrying a plant pot'). Do not summarize, interpret, or "
-                "assign meaning here — that belongs in the scene field."
-            )
-            observations_prop["minItems"] = min_observations
-            observations_prop["maxItems"] = max_observations
-            observations_prop["items"] = {"type": "string", "minLength": 20}
-
-            required = schema.setdefault("required", [])
-            if "observations" not in required:
-                required.append("observations")
-
         # OpenAI strict mode requires additionalProperties: false on all objects
         schema["additionalProperties"] = False
 
@@ -225,7 +183,37 @@ Each line represents a detection state, not necessarily unique individuals. The
 
             try:
                 metadata = ReviewMetadata.model_validate_json(clean_json)
+            except ValidationError as ve:
+                # Constraint violations (length, item count, ranges) are logged
+                # at debug and the response is kept anyway — a slightly
+                # off-spec answer is still usable, and dropping the whole
+                # response loses the narrative content the model produced.
+                for err in ve.errors():
+                    loc = ".".join(str(p) for p in err["loc"]) or "<root>"
+                    logger.debug(
+                        "Review metadata soft validation: %s — %s (input: %r)",
+                        loc,
+                        err["msg"],
+                        err.get("input"),
+                    )
+                try:
+                    raw = json.loads(clean_json)
+                except json.JSONDecodeError as je:
+                    logger.error(
+                        "Failed to parse review description JSON: %s", je
+                    )
+                    return None
+                # observations is required on the model; fill an empty default
+                # if the response omitted it so attribute access stays safe.
+                raw.setdefault("observations", [])
+                metadata = ReviewMetadata.model_construct(**raw)
+            except Exception as e:
+                logger.error(
+                    f"Failed to parse review description as the response did not match expected format. {e}"
+                )
+                return None
 
+            try:
                 # Normalize confidence if model returned a percentage (e.g. 85 instead of 0.85)
                 if metadata.confidence > 1.0:
                     metadata.confidence = min(metadata.confidence / 100.0, 1.0)
@@ -238,9 +226,8 @@ Each line represents a detection state, not necessarily unique individuals. The
                 metadata.time = review_data["start"]
                 return metadata
             except Exception as e:
-                # rarely LLMs can fail to follow directions on output format
-                logger.warning(
-                    f"Failed to parse review description as the response did not match expected format. {e}"
+                logger.error(
+                    f"Failed to post-process review metadata: {e}"
                 )
                 return None
         else: