Improve chat features (#22663)

* Improve notification messaging

* Improve wake behavior when a zone is not specified

* Fix prompt ordering for generate calls
This commit is contained in:
Nicolas Mowen 2026-03-27 07:48:50 -06:00 committed by GitHub
parent 06ad72860c
commit 1a01513223
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 59 additions and 23 deletions

View File

@ -542,9 +542,9 @@ class WebPushClient(Communicator):
self.check_registrations() self.check_registrations()
reasoning: str = payload.get("reasoning", "") text: str = payload.get("message") or payload.get("reasoning", "")
title = f"{camera_name}: Monitoring Alert" title = f"{camera_name}: Monitoring Alert"
message = (reasoning[:197] + "...") if len(reasoning) > 200 else reasoning message = (text[:197] + "...") if len(text) > 200 else text
logger.debug(f"Sending camera monitoring push notification for {camera_name}") logger.debug(f"Sending camera monitoring push notification for {camera_name}")

View File

@ -50,9 +50,9 @@ class GeminiClient(GenAIClient):
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to Gemini.""" """Submit a request to Gemini."""
contents = [ contents = [prompt] + [
types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images
] + [prompt] ]
try: try:
# Merge runtime_options into generation_config if provided # Merge runtime_options into generation_config if provided
generation_config_dict: dict[str, Any] = {"candidate_count": 1} generation_config_dict: dict[str, Any] = {"candidate_count": 1}

View File

@ -44,7 +44,12 @@ class OpenAIClient(GenAIClient):
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to OpenAI.""" """Submit a request to OpenAI."""
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images] encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
messages_content = [] messages_content: list[dict] = [
{
"type": "text",
"text": prompt,
}
]
for image in encoded_images: for image in encoded_images:
messages_content.append( messages_content.append(
{ {
@ -55,12 +60,6 @@ class OpenAIClient(GenAIClient):
}, },
} }
) )
messages_content.append(
{
"type": "text",
"text": prompt,
}
)
try: try:
request_params = { request_params = {
"model": self.genai_config.model, "model": self.genai_config.model,

View File

@ -25,6 +25,9 @@ logger = logging.getLogger(__name__)
_MIN_INTERVAL = 1 _MIN_INTERVAL = 1
_MAX_INTERVAL = 300 _MAX_INTERVAL = 300
# Minimum seconds between VLM iterations when woken by detections (no zone filter)
_DETECTION_COOLDOWN_WITHOUT_ZONE = 10
# Max user/assistant turn pairs to keep in conversation history # Max user/assistant turn pairs to keep in conversation history
_MAX_HISTORY = 10 _MAX_HISTORY = 10
@ -40,6 +43,7 @@ class VLMWatchJob(Job):
labels: list = field(default_factory=list) labels: list = field(default_factory=list)
zones: list = field(default_factory=list) zones: list = field(default_factory=list)
last_reasoning: str = "" last_reasoning: str = ""
notification_message: str = ""
iteration_count: int = 0 iteration_count: int = 0
def to_dict(self) -> dict[str, Any]: def to_dict(self) -> dict[str, Any]:
@ -196,6 +200,7 @@ class VLMWatchRunner(threading.Thread):
min(_MAX_INTERVAL, int(parsed.get("next_run_in", 30))), min(_MAX_INTERVAL, int(parsed.get("next_run_in", 30))),
) )
reasoning = str(parsed.get("reasoning", "")) reasoning = str(parsed.get("reasoning", ""))
notification_message = str(parsed.get("notification_message", ""))
except (json.JSONDecodeError, ValueError, TypeError) as e: except (json.JSONDecodeError, ValueError, TypeError) as e:
logger.warning( logger.warning(
"VLM watch job %s: failed to parse VLM response: %s", self.job.id, e "VLM watch job %s: failed to parse VLM response: %s", self.job.id, e
@ -203,6 +208,7 @@ class VLMWatchRunner(threading.Thread):
return 30 return 30
self.job.last_reasoning = reasoning self.job.last_reasoning = reasoning
self.job.notification_message = notification_message
self.job.iteration_count += 1 self.job.iteration_count += 1
self._broadcast_status() self._broadcast_status()
@ -213,19 +219,35 @@ class VLMWatchRunner(threading.Thread):
self.job.camera, self.job.camera,
reasoning, reasoning,
) )
self._send_notification(reasoning) self._send_notification(notification_message or reasoning)
self.job.status = JobStatusTypesEnum.success self.job.status = JobStatusTypesEnum.success
return 0 return 0
return next_run_in return next_run_in
def _wait_for_trigger(self, max_wait: float) -> None: def _wait_for_trigger(self, max_wait: float) -> None:
"""Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera.""" """Wait up to max_wait seconds, returning early if a relevant detection fires on the target camera.
deadline = time.time() + max_wait
With zones configured, a matching detection wakes immediately (events
are already filtered). Without zones, detections are frequent so a
cooldown is enforced: messages are continuously drained to prevent
queue backup, but the loop only exits once a match has been seen
*and* the cooldown period has elapsed.
"""
now = time.time()
deadline = now + max_wait
use_cooldown = not self.job.zones
earliest_wake = now + _DETECTION_COOLDOWN_WITHOUT_ZONE if use_cooldown else 0
triggered = False
while not self.cancel_event.is_set(): while not self.cancel_event.is_set():
remaining = deadline - time.time() remaining = deadline - time.time()
if remaining <= 0: if remaining <= 0:
break break
if triggered and time.time() >= earliest_wake:
break
result = self.detection_subscriber.check_for_update( result = self.detection_subscriber.check_for_update(
timeout=min(1.0, remaining) timeout=min(1.0, remaining)
) )
@ -250,12 +272,22 @@ class VLMWatchRunner(threading.Thread):
if cam != self.job.camera or not tracked_objects: if cam != self.job.camera or not tracked_objects:
continue continue
if self._detection_matches_filters(tracked_objects): if self._detection_matches_filters(tracked_objects):
logger.debug( if not use_cooldown:
"VLM watch job %s: woken early by detection event on %s", logger.debug(
self.job.id, "VLM watch job %s: woken early by detection event on %s",
self.job.camera, self.job.id,
) self.job.camera,
break )
break
if not triggered:
logger.debug(
"VLM watch job %s: detection match on %s, draining for %.0fs",
self.job.id,
self.job.camera,
max(0, earliest_wake - time.time()),
)
triggered = True
def _detection_matches_filters(self, tracked_objects: list) -> bool: def _detection_matches_filters(self, tracked_objects: list) -> bool:
"""Return True if any tracked object passes the label and zone filters.""" """Return True if any tracked object passes the label and zone filters."""
@ -284,7 +316,11 @@ class VLMWatchRunner(threading.Thread):
f"You will receive a sequence of frames over time. Use the conversation history to understand " f"You will receive a sequence of frames over time. Use the conversation history to understand "
f"what is stationary vs. actively changing.\n\n" f"what is stationary vs. actively changing.\n\n"
f"For each frame respond with JSON only:\n" f"For each frame respond with JSON only:\n"
f'{{"condition_met": <true/false>, "next_run_in": <integer seconds 1-300>, "reasoning": "<brief explanation>"}}\n\n' f'{{"condition_met": <true/false>, "next_run_in": <integer seconds 1-300>, "reasoning": "<brief explanation>", "notification_message": "<natural language notification>"}}\n\n'
f"Guidelines for notification_message:\n"
f"- Only required when condition_met is true.\n"
f"- Write a short, natural notification a user would want to receive on their phone.\n"
f'- Example: "Your package has been delivered to the front porch."\n\n'
f"Guidelines for next_run_in:\n" f"Guidelines for next_run_in:\n"
f"- Scene is empty / nothing of interest visible: 60-300.\n" f"- Scene is empty / nothing of interest visible: 60-300.\n"
f"- Relevant object(s) visible anywhere in frame (even outside the target zone): 3-10. " f"- Relevant object(s) visible anywhere in frame (even outside the target zone): 3-10. "
@ -294,12 +330,13 @@ class VLMWatchRunner(threading.Thread):
f"- Keep reasoning to 1-2 sentences." f"- Keep reasoning to 1-2 sentences."
) )
def _send_notification(self, reasoning: str) -> None: def _send_notification(self, message: str) -> None:
"""Publish a camera_monitoring event so downstream handlers (web push, MQTT) can notify users.""" """Publish a camera_monitoring event so downstream handlers (web push, MQTT) can notify users."""
payload = { payload = {
"camera": self.job.camera, "camera": self.job.camera,
"condition": self.job.condition, "condition": self.job.condition,
"reasoning": reasoning, "message": message,
"reasoning": self.job.last_reasoning,
"job_id": self.job.id, "job_id": self.job.id,
} }