Use new models endpoint info to determine modalities

This commit is contained in:
Nicolas Mowen 2026-05-12 17:13:12 -06:00
parent ed4b2cab78
commit 2099e1555e

View File

@ -18,6 +18,17 @@ from frigate.genai.utils import parse_tool_calls_from_message
logger = logging.getLogger(__name__)
def _parse_launch_arg(args: list[str], flag: str) -> str | None:
"""Return the value following `flag` in a positional argv list, or None."""
try:
idx = args.index(flag)
except ValueError:
return None
if idx + 1 >= len(args):
return None
return args[idx + 1]
def _to_jpeg(img_bytes: bytes) -> bytes | None:
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
try:
@ -71,26 +82,69 @@ class LlamaCppClient(GenAIClient):
base_url = base_url.replace("/v1", "") # Strip /v1 if included in base_url
configured_model = self.genai_config.model
info = self._get_model_info(base_url, configured_model)
# Query /v1/models to validate the configured model exists
if info is None:
return None
self._context_size = info["context_size"]
self._supports_vision = info["supports_vision"]
self._supports_audio = info["supports_audio"]
self._supports_tools = info["supports_tools"]
self._media_marker = info["media_marker"]
logger.info(
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
configured_model,
self._context_size or "unknown",
self._supports_vision,
self._supports_audio,
self._supports_tools,
)
return base_url
def _get_model_info(
self, base_url: str, configured_model: str
) -> dict[str, Any] | None:
"""Resolve model metadata from /v1/models with /props fallback.
Returns a dict of capability fields, or None if the server's model
registry was reachable and reported the configured model as missing.
A reachable-but-unparseable /v1/models is treated as soft-pass and
falls through to /props, matching prior behavior.
After ggml-org/llama.cpp#22952, /v1/models exposes per-model
`architecture.input_modalities` (text/image/audio) the primary
source. When proxied through llama-swap, the same entry carries
`status.args` (server launch argv) and, for the loaded model,
`meta.n_ctx`. /props remains the only source for `media_marker`,
which the server randomizes per startup unless LLAMA_MEDIA_MARKER
is set.
"""
info: dict[str, Any] = {
"context_size": None,
"supports_vision": False,
"supports_audio": False,
"supports_tools": False,
"media_marker": "<__media__>",
}
model_entry: dict[str, Any] | None = None
try:
response = requests.get(
f"{base_url}/v1/models",
timeout=10,
)
response = requests.get(f"{base_url}/v1/models", timeout=10)
response.raise_for_status()
models_data = response.json()
model_found = False
for model in models_data.get("data", []):
model_ids = {model.get("id")}
for alias in model.get("aliases", []):
model_ids.add(alias)
if configured_model in model_ids:
model_found = True
model_entry = model
break
if not model_found:
if model_entry is None:
available = []
for m in models_data.get("data", []):
available.append(m.get("id", "unknown"))
@ -109,10 +163,35 @@ class LlamaCppClient(GenAIClient):
e,
)
# Query /props for context size, modalities, and tool support.
# The standard /props?model=<name> endpoint works with llama-server.
# If it fails, try the llama-swap per-model passthrough endpoint which
# returns props for a specific model without requiring it to be loaded.
if model_entry is not None:
architecture = model_entry.get("architecture") or {}
input_modalities = architecture.get("input_modalities") or []
if isinstance(input_modalities, list):
info["supports_vision"] = "image" in input_modalities
info["supports_audio"] = "audio" in input_modalities
status = model_entry.get("status") or {}
launch_args = status.get("args") if isinstance(status, dict) else None
if not isinstance(launch_args, list):
launch_args = []
meta = model_entry.get("meta") if isinstance(model_entry, dict) else None
n_ctx = meta.get("n_ctx") if isinstance(meta, dict) else None
if not n_ctx:
n_ctx = _parse_launch_arg(launch_args, "--ctx-size")
if n_ctx:
try:
info["context_size"] = int(n_ctx)
except (TypeError, ValueError):
pass
# Tool calling on llama-server requires --jinja.
if "--jinja" in launch_args:
info["supports_tools"] = True
try:
try:
response = requests.get(
@ -130,44 +209,32 @@ class LlamaCppClient(GenAIClient):
response.raise_for_status()
props = response.json()
# Context size from server runtime config
default_settings = props.get("default_generation_settings", {})
n_ctx = default_settings.get("n_ctx")
if n_ctx:
self._context_size = int(n_ctx)
if info["context_size"] is None:
default_settings = props.get("default_generation_settings", {})
n_ctx = default_settings.get("n_ctx")
if n_ctx:
info["context_size"] = int(n_ctx)
# Modalities (vision, audio)
modalities = props.get("modalities", {})
self._supports_vision = modalities.get("vision", False)
self._supports_audio = modalities.get("audio", False)
if not (info["supports_vision"] or info["supports_audio"]):
modalities = props.get("modalities", {})
info["supports_vision"] = bool(modalities.get("vision", False))
info["supports_audio"] = bool(modalities.get("audio", False))
# Tool support from chat template capabilities
chat_caps = props.get("chat_template_caps", {})
self._supports_tools = chat_caps.get("supports_tools", False)
if not info["supports_tools"]:
chat_caps = props.get("chat_template_caps", {})
info["supports_tools"] = bool(chat_caps.get("supports_tools", False))
# Media marker for multimodal embeddings; the server randomizes this
# per startup unless LLAMA_MEDIA_MARKER is set, so we must read it
# from /props rather than hardcoding "<__media__>".
media_marker = props.get("media_marker")
if isinstance(media_marker, str) and media_marker:
self._media_marker = media_marker
logger.info(
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
configured_model,
self._context_size or "unknown",
self._supports_vision,
self._supports_audio,
self._supports_tools,
)
info["media_marker"] = media_marker
except Exception as e:
logger.warning(
"Failed to query llama.cpp /props endpoint: %s. "
"Using defaults for context size and capabilities.",
"Image embeddings may fail if the server randomized its media marker.",
e,
)
return base_url
return info
def _send(
self,