From 1a015132233c7850cd2122af0d2e0e703b63eaa1 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Fri, 27 Mar 2026 07:48:50 -0600 Subject: [PATCH] Improve chat features (#22663) * Improve notification messaging * Improve wake behavior when a zone is not specified * Fix prompt ordering for generate calls --- frigate/comms/webpush.py | 4 +-- frigate/genai/gemini.py | 4 +-- frigate/genai/openai.py | 13 ++++----- frigate/jobs/vlm_watch.py | 61 +++++++++++++++++++++++++++++++-------- 4 files changed, 59 insertions(+), 23 deletions(-) diff --git a/frigate/comms/webpush.py b/frigate/comms/webpush.py index 53c223e8d..e4ed83268 100644 --- a/frigate/comms/webpush.py +++ b/frigate/comms/webpush.py @@ -542,9 +542,9 @@ class WebPushClient(Communicator): self.check_registrations() - reasoning: str = payload.get("reasoning", "") + text: str = payload.get("message") or payload.get("reasoning", "") title = f"{camera_name}: Monitoring Alert" - message = (reasoning[:197] + "...") if len(reasoning) > 200 else reasoning + message = (text[:197] + "...") if len(text) > 200 else text logger.debug(f"Sending camera monitoring push notification for {camera_name}") diff --git a/frigate/genai/gemini.py b/frigate/genai/gemini.py index e0c0c7698..d9644950d 100644 --- a/frigate/genai/gemini.py +++ b/frigate/genai/gemini.py @@ -50,9 +50,9 @@ class GeminiClient(GenAIClient): response_format: Optional[dict] = None, ) -> Optional[str]: """Submit a request to Gemini.""" - contents = [ + contents = [prompt] + [ types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images - ] + [prompt] + ] try: # Merge runtime_options into generation_config if provided generation_config_dict: dict[str, Any] = {"candidate_count": 1} diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py index 02ad301fa..55fb21cc1 100644 --- a/frigate/genai/openai.py +++ b/frigate/genai/openai.py @@ -44,7 +44,12 @@ class OpenAIClient(GenAIClient): ) -> Optional[str]: """Submit a request to OpenAI.""" encoded_images = [base64.b64encode(image).decode("utf-8") for image in images] - messages_content = [] + messages_content: list[dict] = [ + { + "type": "text", + "text": prompt, + } + ] for image in encoded_images: messages_content.append( { @@ -55,12 +60,6 @@ class OpenAIClient(GenAIClient): }, } ) - messages_content.append( - { - "type": "text", - "text": prompt, - } - ) try: request_params = { "model": self.genai_config.model, diff --git a/frigate/jobs/vlm_watch.py b/frigate/jobs/vlm_watch.py index a66f60dfc..7884d8637 100644 --- a/frigate/jobs/vlm_watch.py +++ b/frigate/jobs/vlm_watch.py @@ -25,6 +25,9 @@ logger = logging.getLogger(__name__) _MIN_INTERVAL = 1 _MAX_INTERVAL = 300 +# Minimum seconds between VLM iterations when woken by detections (no zone filter) +_DETECTION_COOLDOWN_WITHOUT_ZONE = 10 + # Max user/assistant turn pairs to keep in conversation history _MAX_HISTORY = 10 @@ -40,6 +43,7 @@ class VLMWatchJob(Job): labels: list = field(default_factory=list) zones: list = field(default_factory=list) last_reasoning: str = "" + notification_message: str = "" iteration_count: int = 0 def to_dict(self) -> dict[str, Any]: @@ -196,6 +200,7 @@ class VLMWatchRunner(threading.Thread): min(_MAX_INTERVAL, int(parsed.get("next_run_in", 30))), ) reasoning = str(parsed.get("reasoning", "")) + notification_message = str(parsed.get("notification_message", "")) except (json.JSONDecodeError, ValueError, TypeError) as e: logger.warning( "VLM watch job %s: failed to parse VLM response: %s", self.job.id, e @@ -203,6 +208,7 @@ class VLMWatchRunner(threading.Thread): return 30 self.job.last_reasoning = reasoning + self.job.notification_message = notification_message self.job.iteration_count += 1 self._broadcast_status() @@ -213,19 +219,35 @@ class VLMWatchRunner(threading.Thread): self.job.camera, reasoning, ) - self._send_notification(reasoning) + self._send_notification(notification_message or reasoning) self.job.status = JobStatusTypesEnum.success return 0 return next_run_in def _wait_for_trigger(self, max_wait: float) -> None: - """Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera.""" - deadline = time.time() + max_wait + """Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera. + + With zones configured, a matching detection wakes immediately (events + are already filtered). Without zones, detections are frequent so a + cooldown is enforced: messages are continuously drained to prevent + queue backup, but the loop only exits once a match has been seen + *and* the cooldown period has elapsed. + """ + now = time.time() + deadline = now + max_wait + use_cooldown = not self.job.zones + earliest_wake = now + _DETECTION_COOLDOWN_WITHOUT_ZONE if use_cooldown else 0 + triggered = False + while not self.cancel_event.is_set(): remaining = deadline - time.time() if remaining <= 0: break + + if triggered and time.time() >= earliest_wake: + break + result = self.detection_subscriber.check_for_update( timeout=min(1.0, remaining) ) @@ -250,12 +272,22 @@ class VLMWatchRunner(threading.Thread): if cam != self.job.camera or not tracked_objects: continue if self._detection_matches_filters(tracked_objects): - logger.debug( - "VLM watch job %s: woken early by detection event on %s", - self.job.id, - self.job.camera, - ) - break + if not use_cooldown: + logger.debug( + "VLM watch job %s: woken early by detection event on %s", + self.job.id, + self.job.camera, + ) + break + + if not triggered: + logger.debug( + "VLM watch job %s: detection match on %s, draining for %.0fs", + self.job.id, + self.job.camera, + max(0, earliest_wake - time.time()), + ) + triggered = True def _detection_matches_filters(self, tracked_objects: list) -> bool: """Return True if any tracked object passes the label and zone filters.""" @@ -284,7 +316,11 @@ class VLMWatchRunner(threading.Thread): f"You will receive a sequence of frames over time. Use the conversation history to understand " f"what is stationary vs. actively changing.\n\n" f"For each frame respond with JSON only:\n" - f'{{"condition_met": , "next_run_in": , "reasoning": ""}}\n\n' + f'{{"condition_met": , "next_run_in": , "reasoning": "", "notification_message": ""}}\n\n' + f"Guidelines for notification_message:\n" + f"- Only required when condition_met is true.\n" + f"- Write a short, natural notification a user would want to receive on their phone.\n" + f'- Example: "Your package has been delivered to the front porch."\n\n' f"Guidelines for next_run_in:\n" f"- Scene is empty / nothing of interest visible: 60-300.\n" f"- Relevant object(s) visible anywhere in frame (even outside the target zone): 3-10. " @@ -294,12 +330,13 @@ class VLMWatchRunner(threading.Thread): f"- Keep reasoning to 1-2 sentences." ) - def _send_notification(self, reasoning: str) -> None: + def _send_notification(self, message: str) -> None: """Publish a camera_monitoring event so downstream handlers (web push, MQTT) can notify users.""" payload = { "camera": self.job.camera, "condition": self.job.condition, - "reasoning": reasoning, + "message": message, + "reasoning": self.job.last_reasoning, "job_id": self.job.id, }