diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index cf51550b47..bca5e6d691 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -5,7 +5,7 @@ import json
 import logging
 import os
 import re
-from typing import Any, Callable, Optional
+from typing import Any, AsyncGenerator, Callable, Optional
 
 import numpy as np
 from pydantic import ValidationError
@@ -359,6 +359,41 @@ class GenAIClient:
             "finish_reason": "error",
         }
 
+    async def chat_with_tools_stream(
+        self,
+        messages: list[dict[str, Any]],
+        tools: Optional[list[dict[str, Any]]] = None,
+        tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
+    ) -> AsyncGenerator[tuple[str, Any], None]:
+        """Streaming counterpart to `chat_with_tools`.
+
+        Yields ``(kind, value)`` tuples where ``kind`` is one of:
+            - 'content_delta': value is a string fragment of the answer
+            - 'reasoning_delta': value is a string fragment of the reasoning
+              trace (emitted before content for thinking models)
+            - 'stats': value is a usage stats dict
+            - 'message': value is the final dict shape described in
+              `chat_with_tools`
+
+        Argument semantics — including ``enable_thinking`` — match
+        `chat_with_tools`. Providers that don't support streaming should
+        override this and yield an error 'message' event.
+        """
+        logger.warning(
+            f"{self.__class__.__name__} does not support chat_with_tools_stream. "
+            "This method should be overridden by the provider implementation."
+        )
+        yield (
+            "message",
+            {
+                "content": None,
+                "reasoning": None,
+                "tool_calls": None,
+                "finish_reason": "error",
+            },
+        )
+
 
 def load_providers() -> None:
     plugins_dir = os.path.join(os.path.dirname(__file__), "plugins")
diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py
index 6e4b9283fb..8c05e0b1ad 100644
--- a/frigate/genai/plugins/gemini.py
+++ b/frigate/genai/plugins/gemini.py
@@ -369,11 +369,14 @@ class GeminiClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> AsyncGenerator[tuple[str, Any], None]:
         """
         Stream chat with tools; yields content deltas then final message.
 
         Implements streaming function calling/tool usage for Gemini models.
+        ``enable_thinking`` is accepted for interface parity; Gemini configures
+        thinking at the model level, so it is ignored here.
         """
         try:
             # Convert messages to Gemini format
diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py
index 8d422bfb31..3e862f8fd5 100644
--- a/frigate/genai/plugins/openai.py
+++ b/frigate/genai/plugins/openai.py
@@ -309,11 +309,15 @@ class OpenAIClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> AsyncGenerator[tuple[str, Any], None]:
         """
         Stream chat with tools; yields content deltas then final message.
 
         Implements streaming function calling/tool usage for OpenAI models.
+        The OpenAI chat completions API does not expose a per-request thinking
+        toggle, so ``enable_thinking`` is accepted for interface parity and
+        ignored.
         """
         try:
             openai_tool_choice = None