diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index cf51550b47..bca5e6d691 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -5,7 +5,7 @@ import json import logging import os import re -from typing import Any, Callable, Optional +from typing import Any, AsyncGenerator, Callable, Optional import numpy as np from pydantic import ValidationError @@ -359,6 +359,41 @@ class GenAIClient: "finish_reason": "error", } + async def chat_with_tools_stream( + self, + messages: list[dict[str, Any]], + tools: Optional[list[dict[str, Any]]] = None, + tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, + ) -> AsyncGenerator[tuple[str, Any], None]: + """Streaming counterpart to `chat_with_tools`. + + Yields ``(kind, value)`` tuples where ``kind`` is one of: + - 'content_delta': value is a string fragment of the answer + - 'reasoning_delta': value is a string fragment of the reasoning + trace (emitted before content for thinking models) + - 'stats': value is a usage stats dict + - 'message': value is the final dict shape described in + `chat_with_tools` + + Argument semantics — including ``enable_thinking`` — match + `chat_with_tools`. Providers that don't support streaming should + override this and yield an error 'message' event. + """ + logger.warning( + f"{self.__class__.__name__} does not support chat_with_tools_stream. " + "This method should be overridden by the provider implementation." + ) + yield ( + "message", + { + "content": None, + "reasoning": None, + "tool_calls": None, + "finish_reason": "error", + }, + ) + def load_providers() -> None: plugins_dir = os.path.join(os.path.dirname(__file__), "plugins") diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py index 6e4b9283fb..8c05e0b1ad 100644 --- a/frigate/genai/plugins/gemini.py +++ b/frigate/genai/plugins/gemini.py @@ -369,11 +369,14 @@ class GeminiClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> AsyncGenerator[tuple[str, Any], None]: """ Stream chat with tools; yields content deltas then final message. Implements streaming function calling/tool usage for Gemini models. + ``enable_thinking`` is accepted for interface parity; Gemini configures + thinking at the model level, so it is ignored here. """ try: # Convert messages to Gemini format diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py index 8d422bfb31..3e862f8fd5 100644 --- a/frigate/genai/plugins/openai.py +++ b/frigate/genai/plugins/openai.py @@ -309,11 +309,15 @@ class OpenAIClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> AsyncGenerator[tuple[str, Any], None]: """ Stream chat with tools; yields content deltas then final message. Implements streaming function calling/tool usage for OpenAI models. + The OpenAI chat completions API does not expose a per-request thinking + toggle, so ``enable_thinking`` is accepted for interface parity and + ignored. """ try: openai_tool_choice = None