mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-04-03 22:04:53 +03:00
Auto llama.cpp context (#22737)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Add model probing * Include aliases * Pull correctly * Correctly query specific model props * Debug log * Update model list
This commit is contained in:
parent
520d9eeb7f
commit
68dfb157ea
@ -29,11 +29,11 @@ You must use a vision-capable model with Frigate. The following models are recom
|
||||
|
||||
| Model | Notes |
|
||||
| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `qwen3-vl` | Strong visual and situational understanding, strong ability to identify smaller objects and interactions with object. |
|
||||
| `qwen3-vl` | Strong visual and situational understanding, enhanced ability to identify smaller objects and interactions with object. |
|
||||
| `qwen3.5` | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. |
|
||||
| `gemma4` | Strong situational understanding, sometimes resorts to more vague terms like 'interacts' instead of assigning a specific action. |
|
||||
| `Intern3.5VL` | Relatively fast with good vision comprehension |
|
||||
| `gemma3` | Slower model with good vision and temporal understanding |
|
||||
| `qwen2.5-vl` | Fast but capable model with good vision comprehension |
|
||||
|
||||
:::info
|
||||
|
||||
|
||||
@ -38,18 +38,111 @@ class LlamaCppClient(GenAIClient):
|
||||
|
||||
provider: str | None # base_url
|
||||
provider_options: dict[str, Any]
|
||||
_context_size: int | None
|
||||
_supports_vision: bool
|
||||
_supports_audio: bool
|
||||
_supports_tools: bool
|
||||
|
||||
def _init_provider(self) -> str | None:
|
||||
"""Initialize the client."""
|
||||
"""Initialize the client and query model metadata from the server."""
|
||||
self.provider_options = {
|
||||
**self.genai_config.provider_options,
|
||||
}
|
||||
return (
|
||||
self._context_size = None
|
||||
self._supports_vision = False
|
||||
self._supports_audio = False
|
||||
self._supports_tools = False
|
||||
|
||||
base_url = (
|
||||
self.genai_config.base_url.rstrip("/")
|
||||
if self.genai_config.base_url
|
||||
else None
|
||||
)
|
||||
|
||||
if base_url is None:
|
||||
return None
|
||||
|
||||
configured_model = self.genai_config.model
|
||||
|
||||
# Query /v1/models to validate the configured model exists
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{base_url}/v1/models",
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
models_data = response.json()
|
||||
|
||||
model_found = False
|
||||
for model in models_data.get("data", []):
|
||||
model_ids = {model.get("id")}
|
||||
for alias in model.get("aliases", []):
|
||||
model_ids.add(alias)
|
||||
if configured_model in model_ids:
|
||||
model_found = True
|
||||
break
|
||||
|
||||
if not model_found:
|
||||
available = []
|
||||
for m in models_data.get("data", []):
|
||||
available.append(m.get("id", "unknown"))
|
||||
for alias in m.get("aliases", []):
|
||||
available.append(alias)
|
||||
logger.error(
|
||||
"Model '%s' not found on llama.cpp server. Available models: %s",
|
||||
configured_model,
|
||||
available,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to query llama.cpp /v1/models endpoint: %s. "
|
||||
"Model validation skipped.",
|
||||
e,
|
||||
)
|
||||
|
||||
# Query /props for context size, modalities, and tool support
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{base_url}/props",
|
||||
params={"model": configured_model},
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
props = response.json()
|
||||
|
||||
# Context size from server runtime config
|
||||
default_settings = props.get("default_generation_settings", {})
|
||||
n_ctx = default_settings.get("n_ctx")
|
||||
if n_ctx:
|
||||
self._context_size = int(n_ctx)
|
||||
|
||||
# Modalities (vision, audio)
|
||||
modalities = props.get("modalities", {})
|
||||
self._supports_vision = modalities.get("vision", False)
|
||||
self._supports_audio = modalities.get("audio", False)
|
||||
|
||||
# Tool support from chat template capabilities
|
||||
chat_caps = props.get("chat_template_caps", {})
|
||||
self._supports_tools = chat_caps.get("supports_tools", False)
|
||||
|
||||
logger.debug(
|
||||
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
|
||||
configured_model,
|
||||
self._context_size or "unknown",
|
||||
self._supports_vision,
|
||||
self._supports_audio,
|
||||
self._supports_tools,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to query llama.cpp /props endpoint: %s. "
|
||||
"Using defaults for context size and capabilities.",
|
||||
e,
|
||||
)
|
||||
|
||||
return base_url
|
||||
|
||||
def _send(
|
||||
self,
|
||||
prompt: str,
|
||||
@ -117,9 +210,34 @@ class LlamaCppClient(GenAIClient):
|
||||
logger.warning("llama.cpp returned an error: %s", str(e))
|
||||
return None
|
||||
|
||||
@property
|
||||
def supports_vision(self) -> bool:
|
||||
"""Whether the loaded model supports vision/image input."""
|
||||
return self._supports_vision
|
||||
|
||||
@property
|
||||
def supports_audio(self) -> bool:
|
||||
"""Whether the loaded model supports audio input."""
|
||||
return self._supports_audio
|
||||
|
||||
@property
|
||||
def supports_tools(self) -> bool:
|
||||
"""Whether the loaded model supports tool/function calling."""
|
||||
return self._supports_tools
|
||||
|
||||
def get_context_size(self) -> int:
|
||||
"""Get the context window size for llama.cpp."""
|
||||
return int(self.provider_options.get("context_size", 4096))
|
||||
"""Get the context window size for llama.cpp.
|
||||
|
||||
Resolution order:
|
||||
1. provider_options["context_size"] (user override)
|
||||
2. Value queried from llama.cpp server at init
|
||||
3. Default fallback of 4096
|
||||
"""
|
||||
if "context_size" in self.provider_options:
|
||||
return int(self.provider_options["context_size"])
|
||||
if self._context_size is not None:
|
||||
return self._context_size
|
||||
return 4096
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user