From 44ec45ab41588a9cb44c026e70cebd687c0448b7 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 21 May 2026 09:23:06 -0600 Subject: [PATCH] Disable thinking for descriptions automatically --- frigate/genai/__init__.py | 9 ++++++++- frigate/genai/plugins/gemini.py | 1 + frigate/genai/plugins/llama_cpp.py | 6 +++++- frigate/genai/plugins/ollama.py | 3 +++ frigate/genai/plugins/openai.py | 1 + 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index 45f8c73449..cf51550b47 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -222,8 +222,15 @@ class GenAIClient: prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: - """Submit a request to the provider.""" + """Submit a request to the provider. + + ``enable_thinking`` is honored only by providers that report + ``supports_toggleable_thinking``. Description-style callers leave it + at the default (off) since synthesis tasks don't benefit from + reasoning traces. + """ return None @property diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py index bcac09d0e3..c1211aa16f 100644 --- a/frigate/genai/plugins/gemini.py +++ b/frigate/genai/plugins/gemini.py @@ -62,6 +62,7 @@ class GeminiClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to Gemini.""" contents = [prompt] + [ diff --git a/frigate/genai/plugins/llama_cpp.py b/frigate/genai/plugins/llama_cpp.py index f48de917f4..d5458cf8f9 100644 --- a/frigate/genai/plugins/llama_cpp.py +++ b/frigate/genai/plugins/llama_cpp.py @@ -312,6 +312,7 @@ class LlamaCppClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to llama.cpp server.""" if self.provider is None: @@ -339,7 +340,7 @@ class LlamaCppClient(GenAIClient): ) # Build request payload with llama.cpp native options - payload = { + payload: dict[str, Any] = { "model": self.genai_config.model, "messages": [ { @@ -353,6 +354,9 @@ class LlamaCppClient(GenAIClient): if response_format: payload["response_format"] = response_format + if self.supports_toggleable_thinking: + payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking} + response = requests.post( f"{self.provider}/v1/chat/completions", json=payload, diff --git a/frigate/genai/plugins/ollama.py b/frigate/genai/plugins/ollama.py index e92cdaff02..08176f524b 100644 --- a/frigate/genai/plugins/ollama.py +++ b/frigate/genai/plugins/ollama.py @@ -194,6 +194,7 @@ class OllamaClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to Ollama""" if self.provider is None: @@ -210,6 +211,8 @@ class OllamaClient(GenAIClient): schema = response_format.get("json_schema", {}).get("schema") if schema: ollama_options["format"] = self._clean_schema_for_ollama(schema) + if self.supports_toggleable_thinking: + ollama_options["think"] = enable_thinking logger.debug( "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, " "has_format=%s, options=%s", diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py index f07e83b5dc..9ab884b3d1 100644 --- a/frigate/genai/plugins/openai.py +++ b/frigate/genai/plugins/openai.py @@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to OpenAI.""" encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]