Disable thinking for descriptions automatically

2026-06-21 03:41:55 +03:00 · 2026-05-21 09:23:06 -06:00 · 2026-05-21 09:23:06 -06:00 · 44ec45ab41
commit 44ec45ab41
parent 07c2872b08
5 changed files with 18 additions and 2 deletions
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -222,8 +222,15 @@ class GenAIClient:
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
    ) -> Optional[str]:
-        """Submit a request to the provider."""
+        """Submit a request to the provider.
+
+        ``enable_thinking`` is honored only by providers that report
+        ``supports_toggleable_thinking``. Description-style callers leave it
+        at the default (off) since synthesis tasks don't benefit from
+        reasoning traces.
+        """
        return None

    @property
--- a/frigate/genai/plugins/gemini.py
+++ b/frigate/genai/plugins/gemini.py
@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to Gemini."""
        contents = [prompt] + [
--- a/frigate/genai/plugins/llama_cpp.py
+++ b/frigate/genai/plugins/llama_cpp.py
@ -312,6 +312,7 @@ class LlamaCppClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to llama.cpp server."""
        if self.provider is None:
@ -339,7 +340,7 @@ class LlamaCppClient(GenAIClient):
                )

            # Build request payload with llama.cpp native options
-            payload = {
+            payload: dict[str, Any] = {
                "model": self.genai_config.model,
                "messages": [
                    {
@ -353,6 +354,9 @@ class LlamaCppClient(GenAIClient):
            if response_format:
                payload["response_format"] = response_format

+            if self.supports_toggleable_thinking:
+                payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
+
            response = requests.post(
                f"{self.provider}/v1/chat/completions",
                json=payload,
--- a/frigate/genai/plugins/ollama.py
+++ b/frigate/genai/plugins/ollama.py
@ -194,6 +194,7 @@ class OllamaClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to Ollama"""
        if self.provider is None:
@ -210,6 +211,8 @@ class OllamaClient(GenAIClient):
                schema = response_format.get("json_schema", {}).get("schema")
                if schema:
                    ollama_options["format"] = self._clean_schema_for_ollama(schema)
+            if self.supports_toggleable_thinking:
+                ollama_options["think"] = enable_thinking
            logger.debug(
                "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
                "has_format=%s, options=%s",
--- a/frigate/genai/plugins/openai.py
+++ b/frigate/genai/plugins/openai.py
@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to OpenAI."""
        encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]