mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-04-03 22:04:53 +03:00
Auto llama.cpp context (#22737)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Add model probing * Include aliases * Pull correctly * Correctly query specific model props * Debug log * Update model list
This commit is contained in:
parent
520d9eeb7f
commit
68dfb157ea
@ -29,11 +29,11 @@ You must use a vision-capable model with Frigate. The following models are recom
|
|||||||
|
|
||||||
| Model | Notes |
|
| Model | Notes |
|
||||||
| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `qwen3-vl` | Strong visual and situational understanding, strong ability to identify smaller objects and interactions with object. |
|
| `qwen3-vl` | Strong visual and situational understanding, enhanced ability to identify smaller objects and interactions with object. |
|
||||||
| `qwen3.5` | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. |
|
| `qwen3.5` | Strong situational understanding, but missing DeepStack from qwen3-vl leading to worse performance for identifying objects in people's hand and other small details. |
|
||||||
|
| `gemma4` | Strong situational understanding, sometimes resorts to more vague terms like 'interacts' instead of assigning a specific action. |
|
||||||
| `Intern3.5VL` | Relatively fast with good vision comprehension |
|
| `Intern3.5VL` | Relatively fast with good vision comprehension |
|
||||||
| `gemma3` | Slower model with good vision and temporal understanding |
|
| `gemma3` | Slower model with good vision and temporal understanding |
|
||||||
| `qwen2.5-vl` | Fast but capable model with good vision comprehension |
|
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
|||||||
@ -38,18 +38,111 @@ class LlamaCppClient(GenAIClient):
|
|||||||
|
|
||||||
provider: str | None # base_url
|
provider: str | None # base_url
|
||||||
provider_options: dict[str, Any]
|
provider_options: dict[str, Any]
|
||||||
|
_context_size: int | None
|
||||||
|
_supports_vision: bool
|
||||||
|
_supports_audio: bool
|
||||||
|
_supports_tools: bool
|
||||||
|
|
||||||
def _init_provider(self) -> str | None:
|
def _init_provider(self) -> str | None:
|
||||||
"""Initialize the client."""
|
"""Initialize the client and query model metadata from the server."""
|
||||||
self.provider_options = {
|
self.provider_options = {
|
||||||
**self.genai_config.provider_options,
|
**self.genai_config.provider_options,
|
||||||
}
|
}
|
||||||
return (
|
self._context_size = None
|
||||||
|
self._supports_vision = False
|
||||||
|
self._supports_audio = False
|
||||||
|
self._supports_tools = False
|
||||||
|
|
||||||
|
base_url = (
|
||||||
self.genai_config.base_url.rstrip("/")
|
self.genai_config.base_url.rstrip("/")
|
||||||
if self.genai_config.base_url
|
if self.genai_config.base_url
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if base_url is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
configured_model = self.genai_config.model
|
||||||
|
|
||||||
|
# Query /v1/models to validate the configured model exists
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
f"{base_url}/v1/models",
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
models_data = response.json()
|
||||||
|
|
||||||
|
model_found = False
|
||||||
|
for model in models_data.get("data", []):
|
||||||
|
model_ids = {model.get("id")}
|
||||||
|
for alias in model.get("aliases", []):
|
||||||
|
model_ids.add(alias)
|
||||||
|
if configured_model in model_ids:
|
||||||
|
model_found = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not model_found:
|
||||||
|
available = []
|
||||||
|
for m in models_data.get("data", []):
|
||||||
|
available.append(m.get("id", "unknown"))
|
||||||
|
for alias in m.get("aliases", []):
|
||||||
|
available.append(alias)
|
||||||
|
logger.error(
|
||||||
|
"Model '%s' not found on llama.cpp server. Available models: %s",
|
||||||
|
configured_model,
|
||||||
|
available,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to query llama.cpp /v1/models endpoint: %s. "
|
||||||
|
"Model validation skipped.",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Query /props for context size, modalities, and tool support
|
||||||
|
try:
|
||||||
|
response = requests.get(
|
||||||
|
f"{base_url}/props",
|
||||||
|
params={"model": configured_model},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
props = response.json()
|
||||||
|
|
||||||
|
# Context size from server runtime config
|
||||||
|
default_settings = props.get("default_generation_settings", {})
|
||||||
|
n_ctx = default_settings.get("n_ctx")
|
||||||
|
if n_ctx:
|
||||||
|
self._context_size = int(n_ctx)
|
||||||
|
|
||||||
|
# Modalities (vision, audio)
|
||||||
|
modalities = props.get("modalities", {})
|
||||||
|
self._supports_vision = modalities.get("vision", False)
|
||||||
|
self._supports_audio = modalities.get("audio", False)
|
||||||
|
|
||||||
|
# Tool support from chat template capabilities
|
||||||
|
chat_caps = props.get("chat_template_caps", {})
|
||||||
|
self._supports_tools = chat_caps.get("supports_tools", False)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
|
||||||
|
configured_model,
|
||||||
|
self._context_size or "unknown",
|
||||||
|
self._supports_vision,
|
||||||
|
self._supports_audio,
|
||||||
|
self._supports_tools,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to query llama.cpp /props endpoint: %s. "
|
||||||
|
"Using defaults for context size and capabilities.",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
|
||||||
|
return base_url
|
||||||
|
|
||||||
def _send(
|
def _send(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
@ -117,9 +210,34 @@ class LlamaCppClient(GenAIClient):
|
|||||||
logger.warning("llama.cpp returned an error: %s", str(e))
|
logger.warning("llama.cpp returned an error: %s", str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_vision(self) -> bool:
|
||||||
|
"""Whether the loaded model supports vision/image input."""
|
||||||
|
return self._supports_vision
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_audio(self) -> bool:
|
||||||
|
"""Whether the loaded model supports audio input."""
|
||||||
|
return self._supports_audio
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_tools(self) -> bool:
|
||||||
|
"""Whether the loaded model supports tool/function calling."""
|
||||||
|
return self._supports_tools
|
||||||
|
|
||||||
def get_context_size(self) -> int:
|
def get_context_size(self) -> int:
|
||||||
"""Get the context window size for llama.cpp."""
|
"""Get the context window size for llama.cpp.
|
||||||
return int(self.provider_options.get("context_size", 4096))
|
|
||||||
|
Resolution order:
|
||||||
|
1. provider_options["context_size"] (user override)
|
||||||
|
2. Value queried from llama.cpp server at init
|
||||||
|
3. Default fallback of 4096
|
||||||
|
"""
|
||||||
|
if "context_size" in self.provider_options:
|
||||||
|
return int(self.provider_options["context_size"])
|
||||||
|
if self._context_size is not None:
|
||||||
|
return self._context_size
|
||||||
|
return 4096
|
||||||
|
|
||||||
def _build_payload(
|
def _build_payload(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user