From 1a015132233c7850cd2122af0d2e0e703b63eaa1 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Fri, 27 Mar 2026 07:48:50 -0600
Subject: [PATCH] Improve chat features (#22663)

* Improve notification messaging

* Improve wake behavior when a zone is not specified

* Fix prompt ordering for generate calls
---
 frigate/comms/webpush.py  |  4 +--
 frigate/genai/gemini.py   |  4 +--
 frigate/genai/openai.py   | 13 ++++-----
 frigate/jobs/vlm_watch.py | 61 +++++++++++++++++++++++++++++++--------
 4 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/frigate/comms/webpush.py b/frigate/comms/webpush.py
index 53c223e8d..e4ed83268 100644
--- a/frigate/comms/webpush.py
+++ b/frigate/comms/webpush.py
@@ -542,9 +542,9 @@ class WebPushClient(Communicator):
 
         self.check_registrations()
 
-        reasoning: str = payload.get("reasoning", "")
+        text: str = payload.get("message") or payload.get("reasoning", "")
         title = f"{camera_name}: Monitoring Alert"
-        message = (reasoning[:197] + "...") if len(reasoning) > 200 else reasoning
+        message = (text[:197] + "...") if len(text) > 200 else text
 
         logger.debug(f"Sending camera monitoring push notification for {camera_name}")
 
diff --git a/frigate/genai/gemini.py b/frigate/genai/gemini.py
index e0c0c7698..d9644950d 100644
--- a/frigate/genai/gemini.py
+++ b/frigate/genai/gemini.py
@@ -50,9 +50,9 @@ class GeminiClient(GenAIClient):
         response_format: Optional[dict] = None,
     ) -> Optional[str]:
         """Submit a request to Gemini."""
-        contents = [
+        contents = [prompt] + [
             types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images
-        ] + [prompt]
+        ]
         try:
             # Merge runtime_options into generation_config if provided
             generation_config_dict: dict[str, Any] = {"candidate_count": 1}
diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py
index 02ad301fa..55fb21cc1 100644
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@@ -44,7 +44,12 @@ class OpenAIClient(GenAIClient):
     ) -> Optional[str]:
         """Submit a request to OpenAI."""
         encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
-        messages_content = []
+        messages_content: list[dict] = [
+            {
+                "type": "text",
+                "text": prompt,
+            }
+        ]
         for image in encoded_images:
             messages_content.append(
                 {
@@ -55,12 +60,6 @@ class OpenAIClient(GenAIClient):
                     },
                 }
             )
-        messages_content.append(
-            {
-                "type": "text",
-                "text": prompt,
-            }
-        )
         try:
             request_params = {
                 "model": self.genai_config.model,
diff --git a/frigate/jobs/vlm_watch.py b/frigate/jobs/vlm_watch.py
index a66f60dfc..7884d8637 100644
--- a/frigate/jobs/vlm_watch.py
+++ b/frigate/jobs/vlm_watch.py
@@ -25,6 +25,9 @@ logger = logging.getLogger(__name__)
 _MIN_INTERVAL = 1
 _MAX_INTERVAL = 300
 
+# Minimum seconds between VLM iterations when woken by detections (no zone filter)
+_DETECTION_COOLDOWN_WITHOUT_ZONE = 10
+
 # Max user/assistant turn pairs to keep in conversation history
 _MAX_HISTORY = 10
 
@@ -40,6 +43,7 @@ class VLMWatchJob(Job):
     labels: list = field(default_factory=list)
     zones: list = field(default_factory=list)
     last_reasoning: str = ""
+    notification_message: str = ""
     iteration_count: int = 0
 
     def to_dict(self) -> dict[str, Any]:
@@ -196,6 +200,7 @@ class VLMWatchRunner(threading.Thread):
                 min(_MAX_INTERVAL, int(parsed.get("next_run_in", 30))),
             )
             reasoning = str(parsed.get("reasoning", ""))
+            notification_message = str(parsed.get("notification_message", ""))
         except (json.JSONDecodeError, ValueError, TypeError) as e:
             logger.warning(
                 "VLM watch job %s: failed to parse VLM response: %s", self.job.id, e
@@ -203,6 +208,7 @@ class VLMWatchRunner(threading.Thread):
             return 30
 
         self.job.last_reasoning = reasoning
+        self.job.notification_message = notification_message
         self.job.iteration_count += 1
         self._broadcast_status()
 
@@ -213,19 +219,35 @@ class VLMWatchRunner(threading.Thread):
                 self.job.camera,
                 reasoning,
             )
-            self._send_notification(reasoning)
+            self._send_notification(notification_message or reasoning)
             self.job.status = JobStatusTypesEnum.success
             return 0
 
         return next_run_in
 
     def _wait_for_trigger(self, max_wait: float) -> None:
-        """Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera."""
-        deadline = time.time() + max_wait
+        """Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera.
+
+        With zones configured, a matching detection wakes immediately (events
+        are already filtered).  Without zones, detections are frequent so a
+        cooldown is enforced: messages are continuously drained to prevent
+        queue backup, but the loop only exits once a match has been seen
+        *and* the cooldown period has elapsed.
+        """
+        now = time.time()
+        deadline = now + max_wait
+        use_cooldown = not self.job.zones
+        earliest_wake = now + _DETECTION_COOLDOWN_WITHOUT_ZONE if use_cooldown else 0
+        triggered = False
+
         while not self.cancel_event.is_set():
             remaining = deadline - time.time()
             if remaining <= 0:
                 break
+
+            if triggered and time.time() >= earliest_wake:
+                break
+
             result = self.detection_subscriber.check_for_update(
                 timeout=min(1.0, remaining)
             )
@@ -250,12 +272,22 @@ class VLMWatchRunner(threading.Thread):
             if cam != self.job.camera or not tracked_objects:
                 continue
             if self._detection_matches_filters(tracked_objects):
-                logger.debug(
-                    "VLM watch job %s: woken early by detection event on %s",
-                    self.job.id,
-                    self.job.camera,
-                )
-                break
+                if not use_cooldown:
+                    logger.debug(
+                        "VLM watch job %s: woken early by detection event on %s",
+                        self.job.id,
+                        self.job.camera,
+                    )
+                    break
+
+                if not triggered:
+                    logger.debug(
+                        "VLM watch job %s: detection match on %s, draining for %.0fs",
+                        self.job.id,
+                        self.job.camera,
+                        max(0, earliest_wake - time.time()),
+                    )
+                    triggered = True
 
     def _detection_matches_filters(self, tracked_objects: list) -> bool:
         """Return True if any tracked object passes the label and zone filters."""
@@ -284,7 +316,11 @@ class VLMWatchRunner(threading.Thread):
             f"You will receive a sequence of frames over time. Use the conversation history to understand "
             f"what is stationary vs. actively changing.\n\n"
             f"For each frame respond with JSON only:\n"
-            f'{{"condition_met": <true/false>, "next_run_in": <integer seconds 1-300>, "reasoning": "<brief explanation>"}}\n\n'
+            f'{{"condition_met": <true/false>, "next_run_in": <integer seconds 1-300>, "reasoning": "<brief explanation>", "notification_message": "<natural language notification>"}}\n\n'
+            f"Guidelines for notification_message:\n"
+            f"- Only required when condition_met is true.\n"
+            f"- Write a short, natural notification a user would want to receive on their phone.\n"
+            f'- Example: "Your package has been delivered to the front porch."\n\n'
             f"Guidelines for next_run_in:\n"
             f"- Scene is empty / nothing of interest visible: 60-300.\n"
             f"- Relevant object(s) visible anywhere in frame (even outside the target zone): 3-10. "
@@ -294,12 +330,13 @@ class VLMWatchRunner(threading.Thread):
             f"- Keep reasoning to 1-2 sentences."
         )
 
-    def _send_notification(self, reasoning: str) -> None:
+    def _send_notification(self, message: str) -> None:
         """Publish a camera_monitoring event so downstream handlers (web push, MQTT) can notify users."""
         payload = {
             "camera": self.job.camera,
             "condition": self.job.condition,
-            "reasoning": reasoning,
+            "message": message,
+            "reasoning": self.job.last_reasoning,
             "job_id": self.job.id,
         }