Auto llama.cpp context (#22737)

* Add model probing * Include aliases * Pull correctly * Correctly query specific model props * Debug log * Update model list
2026-06-30 00:51:14 +03:00 · 2026-04-02 19:13:34 -06:00 · 2026-04-02 19:13:34 -06:00 · 68dfb157ea
commit 68dfb157ea
parent 520d9eeb7f
2 changed files with 124 additions and 6 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -29,11 +29,11 @@ You must use a vision-capable model with Frigate. The following models are recom
 | Model         | Notes                                                                                                                                                                |
 | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `qwen3-vl`    | Strong visual and situational understanding, strong ability to identify smaller objects and interactions with object.                                                |
+| `qwen3-vl`    | Strong visual and situational understanding, enhanced ability to identify smaller objects and interactions with object.                                              |
 | `qwen3.5`     | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. |
 | `gemma4`      | Strong situational understanding, sometimes resorts to more vague terms like 'interacts' instead of assigning a specific action.                                     |
 | `Intern3.5VL` | Relatively fast with good vision comprehension                                                                                                                       |
 | `gemma3`      | Slower model with good vision and temporal understanding                                                                                                             |
 | `qwen2.5-vl`  | Fast but capable model with good vision comprehension                                                                                                                |
 :::info
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@ -38,18 +38,111 @@ class LlamaCppClient(GenAIClient):
    provider: str | None  # base_url
    provider_options: dict[str, Any]
    _context_size: int | None
    _supports_vision: bool
    _supports_audio: bool
    _supports_tools: bool
    def _init_provider(self) -> str | None:
-        """Initialize the client."""
+        """Initialize the client and query model metadata from the server."""
        self.provider_options = {
            **self.genai_config.provider_options,
        }
-        return (
+        self._context_size = None
        self._supports_vision = False
        self._supports_audio = False
        self._supports_tools = False
        base_url = (
            self.genai_config.base_url.rstrip("/")
            if self.genai_config.base_url
            else None
        )
        if base_url is None:
            return None
        configured_model = self.genai_config.model
        # Query /v1/models to validate the configured model exists
        try:
            response = requests.get(
                f"{base_url}/v1/models",
                timeout=10,
            )
            response.raise_for_status()
            models_data = response.json()
            model_found = False
            for model in models_data.get("data", []):
                model_ids = {model.get("id")}
                for alias in model.get("aliases", []):
                    model_ids.add(alias)
                if configured_model in model_ids:
                    model_found = True
                    break
            if not model_found:
                available = []
                for m in models_data.get("data", []):
                    available.append(m.get("id", "unknown"))
                    for alias in m.get("aliases", []):
                        available.append(alias)
                logger.error(
                    "Model '%s' not found on llama.cpp server. Available models: %s",
                    configured_model,
                    available,
                )
                return None
        except Exception as e:
            logger.warning(
                "Failed to query llama.cpp /v1/models endpoint: %s. "
                "Model validation skipped.",
                e,
            )
        # Query /props for context size, modalities, and tool support
        try:
            response = requests.get(
                f"{base_url}/props",
                params={"model": configured_model},
                timeout=10,
            )
            response.raise_for_status()
            props = response.json()
            # Context size from server runtime config
            default_settings = props.get("default_generation_settings", {})
            n_ctx = default_settings.get("n_ctx")
            if n_ctx:
                self._context_size = int(n_ctx)
            # Modalities (vision, audio)
            modalities = props.get("modalities", {})
            self._supports_vision = modalities.get("vision", False)
            self._supports_audio = modalities.get("audio", False)
            # Tool support from chat template capabilities
            chat_caps = props.get("chat_template_caps", {})
            self._supports_tools = chat_caps.get("supports_tools", False)
            logger.debug(
                "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
                configured_model,
                self._context_size or "unknown",
                self._supports_vision,
                self._supports_audio,
                self._supports_tools,
            )
        except Exception as e:
            logger.warning(
                "Failed to query llama.cpp /props endpoint: %s. "
                "Using defaults for context size and capabilities.",
                e,
            )
        return base_url
    def _send(
        self,
        prompt: str,
@ -117,9 +210,34 @@ class LlamaCppClient(GenAIClient):
            logger.warning("llama.cpp returned an error: %s", str(e))
            return None
    @property
    def supports_vision(self) -> bool:
        """Whether the loaded model supports vision/image input."""
        return self._supports_vision
    @property
    def supports_audio(self) -> bool:
        """Whether the loaded model supports audio input."""
        return self._supports_audio
    @property
    def supports_tools(self) -> bool:
        """Whether the loaded model supports tool/function calling."""
        return self._supports_tools
    def get_context_size(self) -> int:
-        """Get the context window size for llama.cpp."""
+        """Get the context window size for llama.cpp.
-        return int(self.provider_options.get("context_size", 4096))
+
        Resolution order:
        1. provider_options["context_size"] (user override)
        2. Value queried from llama.cpp server at init
        3. Default fallback of 4096
        """
        if "context_size" in self.provider_options:
            return int(self.provider_options["context_size"])
        if self._context_size is not None:
            return self._context_size
        return 4096
    def _build_payload(
        self,