Disable thinking for descriptions automatically

2026-07-05 03:21:16 +03:00 · 2026-05-21 09:23:06 -06:00 · 2026-05-21 09:23:06 -06:00 · 44ec45ab41
commit 44ec45ab41
parent 07c2872b08
5 changed files with 18 additions and 2 deletions
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -222,8 +222,15 @@ class GenAIClient:
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
        enable_thinking: bool = False,
    ) -> Optional[str]:
-        """Submit a request to the provider."""
+        """Submit a request to the provider.
        ``enable_thinking`` is honored only by providers that report
        ``supports_toggleable_thinking``. Description-style callers leave it
        at the default (off) since synthesis tasks don't benefit from
        reasoning traces.
        """
        return None
    @property
--- a/frigate/genai/plugins/gemini.py
+++ b/frigate/genai/plugins/gemini.py
@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to Gemini."""
        contents = [prompt] + [
--- a/frigate/genai/plugins/llama_cpp.py
+++ b/frigate/genai/plugins/llama_cpp.py
@ -312,6 +312,7 @@ class LlamaCppClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to llama.cpp server."""
        if self.provider is None:
@ -339,7 +340,7 @@ class LlamaCppClient(GenAIClient):
                )
            # Build request payload with llama.cpp native options
-            payload = {
+            payload: dict[str, Any] = {
                "model": self.genai_config.model,
                "messages": [
                    {
@ -353,6 +354,9 @@ class LlamaCppClient(GenAIClient):
            if response_format:
                payload["response_format"] = response_format
            if self.supports_toggleable_thinking:
                payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
            response = requests.post(
                f"{self.provider}/v1/chat/completions",
                json=payload,
--- a/frigate/genai/plugins/ollama.py
+++ b/frigate/genai/plugins/ollama.py
@ -194,6 +194,7 @@ class OllamaClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to Ollama"""
        if self.provider is None:
@ -210,6 +211,8 @@ class OllamaClient(GenAIClient):
                schema = response_format.get("json_schema", {}).get("schema")
                if schema:
                    ollama_options["format"] = self._clean_schema_for_ollama(schema)
            if self.supports_toggleable_thinking:
                ollama_options["think"] = enable_thinking
            logger.debug(
                "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
                "has_format=%s, options=%s",
--- a/frigate/genai/plugins/openai.py
+++ b/frigate/genai/plugins/openai.py
@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
        prompt: str,
        images: list[bytes],
        response_format: Optional[dict] = None,
        enable_thinking: bool = False,
    ) -> Optional[str]:
        """Submit a request to OpenAI."""
        encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]