Auto llama.cpp context (#22737)

* Add model probing * Include aliases * Pull correctly * Correctly query specific model props * Debug log * Update model list
2026-07-13 15:31:14 +03:00 · 2026-04-02 19:13:34 -06:00 · 2026-04-02 19:13:34 -06:00 · 68dfb157ea
commit 68dfb157ea
parent 520d9eeb7f
2 changed files with 124 additions and 6 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -29,11 +29,11 @@ You must use a vision-capable model with Frigate. The following models are recom

 | Model         | Notes                                                                                                                                                                |
 | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `qwen3-vl`    | Strong visual and situational understanding, strong ability to identify smaller objects and interactions with object.                                                |
+| `qwen3-vl`    | Strong visual and situational understanding, enhanced ability to identify smaller objects and interactions with object.                                              |
 | `qwen3.5`     | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. |
+| `gemma4`      | Strong situational understanding, sometimes resorts to more vague terms like 'interacts' instead of assigning a specific action.                                     |
 | `Intern3.5VL` | Relatively fast with good vision comprehension                                                                                                                       |
 | `gemma3`      | Slower model with good vision and temporal understanding                                                                                                             |
-| `qwen2.5-vl`  | Fast but capable model with good vision comprehension                                                                                                                |

 :::info

--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@ -38,18 +38,111 @@ class LlamaCppClient(GenAIClient):

    provider: str | None  # base_url
    provider_options: dict[str, Any]
+    _context_size: int | None
+    _supports_vision: bool
+    _supports_audio: bool
+    _supports_tools: bool

    def _init_provider(self) -> str | None:
-        """Initialize the client."""
+        """Initialize the client and query model metadata from the server."""
        self.provider_options = {
            **self.genai_config.provider_options,
        }
-        return (
+        self._context_size = None
+        self._supports_vision = False
+        self._supports_audio = False
+        self._supports_tools = False
+
+        base_url = (
            self.genai_config.base_url.rstrip("/")
            if self.genai_config.base_url
            else None
        )

+        if base_url is None:
+            return None
+
+        configured_model = self.genai_config.model
+
+        # Query /v1/models to validate the configured model exists
+        try:
+            response = requests.get(
+                f"{base_url}/v1/models",
+                timeout=10,
+            )
+            response.raise_for_status()
+            models_data = response.json()
+
+            model_found = False
+            for model in models_data.get("data", []):
+                model_ids = {model.get("id")}
+                for alias in model.get("aliases", []):
+                    model_ids.add(alias)
+                if configured_model in model_ids:
+                    model_found = True
+                    break
+
+            if not model_found:
+                available = []
+                for m in models_data.get("data", []):
+                    available.append(m.get("id", "unknown"))
+                    for alias in m.get("aliases", []):
+                        available.append(alias)
+                logger.error(
+                    "Model '%s' not found on llama.cpp server. Available models: %s",
+                    configured_model,
+                    available,
+                )
+                return None
+        except Exception as e:
+            logger.warning(
+                "Failed to query llama.cpp /v1/models endpoint: %s. "
+                "Model validation skipped.",
+                e,
+            )
+
+        # Query /props for context size, modalities, and tool support
+        try:
+            response = requests.get(
+                f"{base_url}/props",
+                params={"model": configured_model},
+                timeout=10,
+            )
+            response.raise_for_status()
+            props = response.json()
+
+            # Context size from server runtime config
+            default_settings = props.get("default_generation_settings", {})
+            n_ctx = default_settings.get("n_ctx")
+            if n_ctx:
+                self._context_size = int(n_ctx)
+
+            # Modalities (vision, audio)
+            modalities = props.get("modalities", {})
+            self._supports_vision = modalities.get("vision", False)
+            self._supports_audio = modalities.get("audio", False)
+
+            # Tool support from chat template capabilities
+            chat_caps = props.get("chat_template_caps", {})
+            self._supports_tools = chat_caps.get("supports_tools", False)
+
+            logger.debug(
+                "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
+                configured_model,
+                self._context_size or "unknown",
+                self._supports_vision,
+                self._supports_audio,
+                self._supports_tools,
+            )
+        except Exception as e:
+            logger.warning(
+                "Failed to query llama.cpp /props endpoint: %s. "
+                "Using defaults for context size and capabilities.",
+                e,
+            )
+
+        return base_url
+
    def _send(
        self,
        prompt: str,
@ -117,9 +210,34 @@ class LlamaCppClient(GenAIClient):
            logger.warning("llama.cpp returned an error: %s", str(e))
            return None

+    @property
+    def supports_vision(self) -> bool:
+        """Whether the loaded model supports vision/image input."""
+        return self._supports_vision
+
+    @property
+    def supports_audio(self) -> bool:
+        """Whether the loaded model supports audio input."""
+        return self._supports_audio
+
+    @property
+    def supports_tools(self) -> bool:
+        """Whether the loaded model supports tool/function calling."""
+        return self._supports_tools
+
    def get_context_size(self) -> int:
-        """Get the context window size for llama.cpp."""
-        return int(self.provider_options.get("context_size", 4096))
+        """Get the context window size for llama.cpp.
+
+        Resolution order:
+        1. provider_options["context_size"] (user override)
+        2. Value queried from llama.cpp server at init
+        3. Default fallback of 4096
+        """
+        if "context_size" in self.provider_options:
+            return int(self.provider_options["context_size"])
+        if self._context_size is not None:
+            return self._context_size
+        return 4096

    def _build_payload(
        self,