diff --git a/frigate/api/chat.py b/frigate/api/chat.py
index 0543d5f8a..939512dd5 100644
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@@ -36,6 +36,7 @@ from frigate.api.defs.response.chat_response import (
 )
 from frigate.api.defs.tags import Tags
 from frigate.api.event import events
+from frigate.config import FrigateConfig
 from frigate.genai.utils import build_assistant_message_for_conversation
 from frigate.jobs.vlm_watch import (
     get_vlm_watch_job,
@@ -401,9 +402,38 @@ def get_tools() -> JSONResponse:
     return JSONResponse(content={"tools": tools})
 
 
+def _resolve_zones(
+    zones: List[str],
+    config: FrigateConfig,
+    target_cameras: List[str],
+) -> List[str]:
+    """Map zone names to their canonical config keys, case-insensitively.
+
+    LLMs frequently echo a user's casing ("Front Yard") instead of the
+    configured key ("front_yard"). The downstream zone filter is a SQLite GLOB
+    over the JSON-encoded zones column, which is case-sensitive — so an
+    unnormalized name silently returns zero matches. Build a lookup over the
+    relevant cameras' configured zones and substitute when we find a match;
+    unknown names pass through so behavior matches what the model asked for.
+    """
+    if not zones:
+        return zones
+
+    lookup: Dict[str, str] = {}
+    for camera_id in target_cameras:
+        camera_config = config.cameras.get(camera_id)
+        if camera_config is None:
+            continue
+        for zone_name in camera_config.zones.keys():
+            lookup.setdefault(zone_name.lower(), zone_name)
+
+    return [lookup.get(z.lower(), z) for z in zones]
+
+
 async def _execute_search_objects(
     arguments: Dict[str, Any],
     allowed_cameras: List[str],
+    config: FrigateConfig,
 ) -> JSONResponse:
     """
     Execute the search_objects tool.
@@ -437,6 +467,11 @@ async def _execute_search_objects(
     # Convert zones array to comma-separated string if provided
     zones = arguments.get("zones")
     if isinstance(zones, list):
+        camera_arg = arguments.get("camera")
+        target_cameras = (
+            [camera_arg] if camera_arg and camera_arg != "all" else allowed_cameras
+        )
+        zones = _resolve_zones(zones, config, target_cameras)
         zones = ",".join(zones)
     elif zones is None:
         zones = "all"
@@ -528,6 +563,11 @@ async def _execute_find_similar_objects(
     sub_labels = arguments.get("sub_labels")
     zones = arguments.get("zones")
 
+    if zones:
+        zones = _resolve_zones(
+            zones, request.app.frigate_config, cameras or list(allowed_cameras)
+        )
+
     similarity_mode = arguments.get("similarity_mode", "fused")
     if similarity_mode not in ("visual", "semantic", "fused"):
         similarity_mode = "fused"
@@ -655,7 +695,9 @@ async def execute_tool(
     logger.debug(f"Executing tool: {tool_name} with arguments: {arguments}")
 
     if tool_name == "search_objects":
-        return await _execute_search_objects(arguments, allowed_cameras)
+        return await _execute_search_objects(
+            arguments, allowed_cameras, request.app.frigate_config
+        )
 
     if tool_name == "find_similar_objects":
         result = await _execute_find_similar_objects(
@@ -835,7 +877,9 @@ async def _execute_tool_internal(
     This is used by the chat completion endpoint to execute tools.
     """
     if tool_name == "search_objects":
-        response = await _execute_search_objects(arguments, allowed_cameras)
+        response = await _execute_search_objects(
+            arguments, allowed_cameras, request.app.frigate_config
+        )
         try:
             if hasattr(response, "body"):
                 body_str = response.body.decode("utf-8")
@@ -899,6 +943,9 @@ async def _execute_start_camera_watch(
 
     await require_camera_access(camera, request=request)
 
+    if zones:
+        zones = _resolve_zones(zones, config, [camera])
+
     genai_manager = request.app.genai_manager
     chat_client = genai_manager.chat_client
     if chat_client is None or not chat_client.supports_vision:
diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py
index 536b57f3c..7f1f65658 100644
--- a/frigate/data_processing/post/review_descriptions.py
+++ b/frigate/data_processing/post/review_descriptions.py
@@ -39,6 +39,8 @@ logger = logging.getLogger(__name__)
 
 RECORDING_BUFFER_EXTENSION_PERCENT = 0.10
 MIN_RECORDING_DURATION = 10
+MAX_IMAGE_TOKENS = 24000
+MAX_FRAMES_PER_SECOND = 2
 
 
 class ReviewDescriptionProcessor(PostProcessorApi):
@@ -60,14 +62,22 @@ class ReviewDescriptionProcessor(PostProcessorApi):
     def calculate_frame_count(
         self,
         camera: str,
+        duration: float,
         image_source: ImageSourceEnum = ImageSourceEnum.preview,
         height: int = 480,
     ) -> int:
-        """Calculate optimal number of frames based on context size, image source, and resolution.
+        """Calculate optimal number of frames based on event duration, context size,
+        image source, and resolution.
 
-        Token usage varies by resolution: larger images (ultra-wide aspect ratios) use more tokens.
-        Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
-        Capped at 20 frames.
+        Per-image token cost is asked of the GenAI provider so providers that know
+        their model's true cost (e.g. llama.cpp can probe the loaded mmproj) can
+        diverge from the default ~1-token-per-1250-pixels heuristic. The frame
+        budget is bounded by:
+          - remaining context window after prompt + response reservations
+          - a fixed MAX_IMAGE_TOKENS ceiling
+          - MAX_FRAMES_PER_SECOND x duration, to avoid drowning short events in
+            near-duplicate frames where the model latches onto the redundant middle
+            and skips the start/end action
         """
         client = self.genai_manager.description_client
 
@@ -105,14 +115,15 @@ class ReviewDescriptionProcessor(PostProcessorApi):
                 width = target_width
                 height = int(target_width / aspect_ratio)
 
-        pixels_per_image = width * height
-        tokens_per_image = pixels_per_image / 1250
+        tokens_per_image = client.estimate_image_tokens(width, height)
         prompt_tokens = 3800
         response_tokens = 300
-        available_tokens = context_size - prompt_tokens - response_tokens
-        max_frames = int(available_tokens / tokens_per_image)
-
-        return min(max(max_frames, 3), 20)
+        context_budget = context_size - prompt_tokens - response_tokens
+        image_token_budget = min(context_budget, MAX_IMAGE_TOKENS)
+        max_frames_by_tokens = int(image_token_budget / tokens_per_image)
+        max_frames_by_duration = int(duration * MAX_FRAMES_PER_SECOND)
+        max_frames = min(max_frames_by_tokens, max_frames_by_duration)
+        return max(max_frames, 3)
 
     def process_data(
         self, data: dict[str, Any], data_type: PostProcessDataEnum
@@ -376,7 +387,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
             all_frames.append(os.path.join(preview_dir, file))
 
         frame_count = len(all_frames)
-        desired_frame_count = self.calculate_frame_count(camera)
+        desired_frame_count = self.calculate_frame_count(
+            camera, duration=end_time - start_time
+        )
 
         if frame_count <= desired_frame_count:
             return all_frames
@@ -400,7 +413,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
         """Get frames from recordings at specified timestamps."""
         duration = end_time - start_time
         desired_frame_count = self.calculate_frame_count(
-            camera, ImageSourceEnum.recordings, height
+            camera, duration, ImageSourceEnum.recordings, height
         )
 
         # Calculate evenly spaced timestamps throughout the duration
diff --git a/frigate/data_processing/post/types.py b/frigate/data_processing/post/types.py
index 02d27ccce..b4deb1ddd 100644
--- a/frigate/data_processing/post/types.py
+++ b/frigate/data_processing/post/types.py
@@ -4,6 +4,10 @@ from pydantic import BaseModel, ConfigDict, Field
 class ReviewMetadata(BaseModel):
     model_config = ConfigDict(extra="ignore", protected_namespaces=())
 
+    observations: list[str] = Field(
+        default_factory=list,
+        description="Chronological list of significant observations from the frames, written before the scene narrative is composed.",
+    )
     title: str = Field(
         description="A short title characterizing what took place and where, under 10 words."
     )
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index a38f10de5..203619dea 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -163,6 +163,38 @@ Each line represents a detection state, not necessarily unique individuals. The
             if prop is not None:
                 prop.update(hints)
 
+        # observations is a chain-of-thought-by-schema field: forcing the model
+        # to enumerate concrete facts before writing scene/title surfaces details
+        # the narrative would otherwise gloss past (e.g. brief vehicle arrivals
+        # overshadowed by a longer activity). The minItems floor scales with
+        # event duration so longer clips get more observations.
+        observations_prop = schema.get("properties", {}).get("observations")
+        if observations_prop is not None:
+            duration_seconds = float(review_data.get("duration") or 0)
+            min_observations = max(3, round(duration_seconds / 5))
+            max_observations = min_observations + 8
+            observations_prop["description"] = (
+                "Enumerate the significant observations across all frames, in "
+                "chronological order, BEFORE composing the scene narrative. "
+                "Include the very start of the activity — for example, a "
+                "vehicle entering the frame or pulling into the driveway — "
+                "even if it lasts only a few frames and the rest of the clip "
+                "is dominated by a longer activity. Include each arrival, "
+                "departure, motion event, object handled, and notable change "
+                "in position or state. Each item is a single concrete fact "
+                "written as a complete sentence (e.g., 'A blue sedan turns "
+                "from the street into the driveway', 'Nick exits the driver "
+                "side carrying a plant pot'). Do not summarize, interpret, or "
+                "assign meaning here — that belongs in the scene field."
+            )
+            observations_prop["minItems"] = min_observations
+            observations_prop["maxItems"] = max_observations
+            observations_prop["items"] = {"type": "string", "minLength": 20}
+
+            required = schema.setdefault("required", [])
+            if "observations" not in required:
+                required.append("observations")
+
         # OpenAI strict mode requires additionalProperties: false on all objects
         schema["additionalProperties"] = False
 
@@ -356,6 +388,14 @@ Guidelines:
         """Get the context window size for this provider in tokens."""
         return 4096
 
+    def estimate_image_tokens(self, width: int, height: int) -> float:
+        """Estimate prompt tokens consumed by a single image of the given dimensions.
+
+        Default heuristic: ~1 token per 1250 pixels. Providers that can measure or
+        know their model's exact image-token cost should override.
+        """
+        return (width * height) / 1250
+
     def embed(
         self,
         texts: list[str] | None = None,
diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py
index e5e9883b8..58c5a707d 100644
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@@ -42,6 +42,8 @@ class LlamaCppClient(GenAIClient):
     _supports_vision: bool
     _supports_audio: bool
     _supports_tools: bool
+    _image_token_cache: dict[tuple[int, int], int]
+    _text_baseline_tokens: int | None
 
     def _init_provider(self) -> str | None:
         """Initialize the client and query model metadata from the server."""
@@ -52,6 +54,8 @@ class LlamaCppClient(GenAIClient):
         self._supports_vision = False
         self._supports_audio = False
         self._supports_tools = False
+        self._image_token_cache = {}
+        self._text_baseline_tokens = None
 
         base_url = (
             self.genai_config.base_url.rstrip("/")
@@ -272,6 +276,91 @@ class LlamaCppClient(GenAIClient):
             return self._context_size
         return 4096
 
+    def estimate_image_tokens(self, width: int, height: int) -> float:
+        """Probe the llama.cpp server to learn the model's image-token cost at the
+        requested dimensions.
+
+        llama.cpp's image tokenization is a deterministic function of dimensions and
+        the loaded mmproj, so the result is cached per (width, height) for the
+        lifetime of the process. Falls back to the base pixel heuristic if the
+        server is unreachable or the response is malformed.
+        """
+        if self.provider is None:
+            return super().estimate_image_tokens(width, height)
+
+        cached = self._image_token_cache.get((width, height))
+
+        if cached is not None:
+            return cached
+
+        try:
+            baseline = self._probe_baseline_tokens()
+            with_image = self._probe_image_prompt_tokens(width, height)
+            tokens = max(1, with_image - baseline)
+        except Exception as e:
+            logger.debug(
+                "llama.cpp image-token probe failed for %dx%d (%s); using heuristic",
+                width,
+                height,
+                e,
+            )
+            return super().estimate_image_tokens(width, height)
+
+        self._image_token_cache[(width, height)] = tokens
+        logger.debug(
+            "llama.cpp model '%s' uses ~%d tokens for %dx%d images",
+            self.genai_config.model,
+            tokens,
+            width,
+            height,
+        )
+        return tokens
+
+    def _probe_baseline_tokens(self) -> int:
+        """Return prompt_tokens for a minimal text-only request. Cached after first call."""
+        if self._text_baseline_tokens is not None:
+            return self._text_baseline_tokens
+
+        self._text_baseline_tokens = self._probe_prompt_tokens(
+            [{"type": "text", "text": "."}]
+        )
+        return self._text_baseline_tokens
+
+    def _probe_image_prompt_tokens(self, width: int, height: int) -> int:
+        """Return prompt_tokens for a single synthetic image plus minimal text."""
+        img = Image.new("RGB", (width, height), (128, 128, 128))
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG", quality=60)
+        encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
+        return self._probe_prompt_tokens(
+            [
+                {"type": "text", "text": "."},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{encoded}"},
+                },
+            ]
+        )
+
+    def _probe_prompt_tokens(self, content: list[dict[str, Any]]) -> int:
+        """POST a 1-token chat completion and return reported prompt_tokens.
+
+        Uses a generous timeout to absorb a cold model load on the first probe
+        when the server lazily loads models on demand (e.g. llama-swap).
+        """
+        payload = {
+            "model": self.genai_config.model,
+            "messages": [{"role": "user", "content": content}],
+            "max_tokens": 1,
+        }
+        response = requests.post(
+            f"{self.provider}/v1/chat/completions",
+            json=payload,
+            timeout=60,
+        )
+        response.raise_for_status()
+        return int(response.json()["usage"]["prompt_tokens"])
+
     def _build_payload(
         self,
         messages: list[dict[str, Any]],
diff --git a/web/src/components/chat/ChatMessage.tsx b/web/src/components/chat/ChatMessage.tsx
index c5f92b5f4..6478b48fc 100644
--- a/web/src/components/chat/ChatMessage.tsx
+++ b/web/src/components/chat/ChatMessage.tsx
@@ -155,14 +155,40 @@ export function MessageBubble({
         ) : (
           <div
             className={cn(
-              "[&>*:last-child]:inline",
               !isComplete &&
-                "after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
+                "[&>p:last-child]:inline after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
             )}
           >
             <ReactMarkdown
               remarkPlugins={[remarkGfm]}
               components={{
+                p: ({ node: _n, ...props }) => (
+                  <p className="my-2 first:mt-0 last:mb-0" {...props} />
+                ),
+                ul: ({ node: _n, ...props }) => (
+                  <ul
+                    className="my-2 list-disc space-y-1 pl-6 first:mt-0 last:mb-0"
+                    {...props}
+                  />
+                ),
+                ol: ({ node: _n, ...props }) => (
+                  <ol
+                    className="my-2 list-decimal space-y-1 pl-6 first:mt-0 last:mb-0"
+                    {...props}
+                  />
+                ),
+                li: ({ node: _n, ...props }) => (
+                  <li className="pl-1" {...props} />
+                ),
+                code: ({ node: _n, className, ...props }) => (
+                  <code
+                    className={cn(
+                      "rounded bg-foreground/10 px-1 py-0.5 font-mono text-sm",
+                      className,
+                    )}
+                    {...props}
+                  />
+                ),
                 table: ({ node: _n, ...props }) => (
                   <table
                     className="my-2 w-full border-collapse border border-border"