diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md index 56e8e2788..e39f88a02 100644 --- a/docs/docs/configuration/genai/config.md +++ b/docs/docs/configuration/genai/config.md @@ -29,11 +29,11 @@ You must use a vision-capable model with Frigate. The following models are recom | Model | Notes | | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `qwen3-vl` | Strong visual and situational understanding, strong ability to identify smaller objects and interactions with object. | +| `qwen3-vl` | Strong visual and situational understanding, enhanced ability to identify smaller objects and interactions with object. | | `qwen3.5` | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. | +| `gemma4` | Strong situational understanding, sometimes resorts to more vague terms like 'interacts' instead of assigning a specific action. | | `Intern3.5VL` | Relatively fast with good vision comprehension | | `gemma3` | Slower model with good vision and temporal understanding | -| `qwen2.5-vl` | Fast but capable model with good vision comprehension | :::info diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py index fbb1b45df..03bd2e963 100644 --- a/frigate/genai/llama_cpp.py +++ b/frigate/genai/llama_cpp.py @@ -38,18 +38,111 @@ class LlamaCppClient(GenAIClient): provider: str | None # base_url provider_options: dict[str, Any] + _context_size: int | None + _supports_vision: bool + _supports_audio: bool + _supports_tools: bool def _init_provider(self) -> str | None: - """Initialize the client.""" + """Initialize the client and query model metadata from the server.""" self.provider_options = { **self.genai_config.provider_options, } - return ( + self._context_size = None + self._supports_vision = False + self._supports_audio = False + self._supports_tools = False + + base_url = ( self.genai_config.base_url.rstrip("/") if self.genai_config.base_url else None ) + if base_url is None: + return None + + configured_model = self.genai_config.model + + # Query /v1/models to validate the configured model exists + try: + response = requests.get( + f"{base_url}/v1/models", + timeout=10, + ) + response.raise_for_status() + models_data = response.json() + + model_found = False + for model in models_data.get("data", []): + model_ids = {model.get("id")} + for alias in model.get("aliases", []): + model_ids.add(alias) + if configured_model in model_ids: + model_found = True + break + + if not model_found: + available = [] + for m in models_data.get("data", []): + available.append(m.get("id", "unknown")) + for alias in m.get("aliases", []): + available.append(alias) + logger.error( + "Model '%s' not found on llama.cpp server. Available models: %s", + configured_model, + available, + ) + return None + except Exception as e: + logger.warning( + "Failed to query llama.cpp /v1/models endpoint: %s. " + "Model validation skipped.", + e, + ) + + # Query /props for context size, modalities, and tool support + try: + response = requests.get( + f"{base_url}/props", + params={"model": configured_model}, + timeout=10, + ) + response.raise_for_status() + props = response.json() + + # Context size from server runtime config + default_settings = props.get("default_generation_settings", {}) + n_ctx = default_settings.get("n_ctx") + if n_ctx: + self._context_size = int(n_ctx) + + # Modalities (vision, audio) + modalities = props.get("modalities", {}) + self._supports_vision = modalities.get("vision", False) + self._supports_audio = modalities.get("audio", False) + + # Tool support from chat template capabilities + chat_caps = props.get("chat_template_caps", {}) + self._supports_tools = chat_caps.get("supports_tools", False) + + logger.debug( + "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s", + configured_model, + self._context_size or "unknown", + self._supports_vision, + self._supports_audio, + self._supports_tools, + ) + except Exception as e: + logger.warning( + "Failed to query llama.cpp /props endpoint: %s. " + "Using defaults for context size and capabilities.", + e, + ) + + return base_url + def _send( self, prompt: str, @@ -117,9 +210,34 @@ class LlamaCppClient(GenAIClient): logger.warning("llama.cpp returned an error: %s", str(e)) return None + @property + def supports_vision(self) -> bool: + """Whether the loaded model supports vision/image input.""" + return self._supports_vision + + @property + def supports_audio(self) -> bool: + """Whether the loaded model supports audio input.""" + return self._supports_audio + + @property + def supports_tools(self) -> bool: + """Whether the loaded model supports tool/function calling.""" + return self._supports_tools + def get_context_size(self) -> int: - """Get the context window size for llama.cpp.""" - return int(self.provider_options.get("context_size", 4096)) + """Get the context window size for llama.cpp. + + Resolution order: + 1. provider_options["context_size"] (user override) + 2. Value queried from llama.cpp server at init + 3. Default fallback of 4096 + """ + if "context_size" in self.provider_options: + return int(self.provider_options["context_size"]) + if self._context_size is not None: + return self._context_size + return 4096 def _build_payload( self,