mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-06-21 03:41:55 +03:00
Fetch embed key whenever an error occurs in case the llama server was restarted
This commit is contained in:
parent
da8e766298
commit
abac6d5253
@ -75,6 +75,29 @@ def _parse_launch_arg(args: list[str], flag: str) -> str | None:
|
||||
return args[idx + 1]
|
||||
|
||||
|
||||
def _fetch_llama_props(base_url: str, model: str) -> dict[str, Any]:
|
||||
"""Fetch /props from a llama.cpp server, with llama-swap fallback.
|
||||
|
||||
Raises the underlying RequestException if both endpoints fail; callers
|
||||
decide how to surface the failure.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{base_url}/props",
|
||||
params={"model": model},
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception:
|
||||
response = requests.get(
|
||||
f"{base_url}/upstream/{model}/props",
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
def _to_jpeg(img_bytes: bytes) -> bytes | None:
|
||||
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
|
||||
try:
|
||||
@ -239,21 +262,7 @@ class LlamaCppClient(GenAIClient):
|
||||
info["supports_tools"] = True
|
||||
|
||||
try:
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{base_url}/props",
|
||||
params={"model": configured_model},
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
props = response.json()
|
||||
except Exception:
|
||||
response = requests.get(
|
||||
f"{base_url}/upstream/{configured_model}/props",
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
props = response.json()
|
||||
props = _fetch_llama_props(base_url, configured_model)
|
||||
|
||||
if info["context_size"] is None:
|
||||
default_settings = props.get("default_generation_settings", {})
|
||||
@ -559,6 +568,31 @@ class LlamaCppClient(GenAIClient):
|
||||
)
|
||||
return result if result else None
|
||||
|
||||
def _refresh_media_marker(self) -> bool:
|
||||
"""Re-fetch /props and update the cached media marker if it changed.
|
||||
|
||||
The server randomizes the marker per startup (unless LLAMA_MEDIA_MARKER
|
||||
is set), so a stale marker indicates a restart. Returns True iff the
|
||||
marker was updated to a new value — used to gate a one-shot retry of
|
||||
a failed embeddings request.
|
||||
"""
|
||||
if self.provider is None:
|
||||
return False
|
||||
try:
|
||||
props = _fetch_llama_props(self.provider, self.genai_config.model)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to refresh llama.cpp media marker: %s", e)
|
||||
return False
|
||||
|
||||
marker = props.get("media_marker")
|
||||
|
||||
if not isinstance(marker, str) or not marker or marker == self._media_marker:
|
||||
return False
|
||||
|
||||
logger.info("llama.cpp media marker changed (server restart); refreshed")
|
||||
self._media_marker = marker
|
||||
return True
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
@ -583,30 +617,46 @@ class LlamaCppClient(GenAIClient):
|
||||
|
||||
EMBEDDING_DIM = 768
|
||||
|
||||
content = []
|
||||
for text in texts:
|
||||
content.append({"prompt_string": text})
|
||||
encoded_images: list[str] = []
|
||||
for img in images:
|
||||
# llama.cpp uses STB which does not support WebP; convert to JPEG
|
||||
jpeg_bytes = _to_jpeg(img)
|
||||
to_encode = jpeg_bytes if jpeg_bytes is not None else img
|
||||
encoded = base64.b64encode(to_encode).decode("utf-8")
|
||||
# prompt_string must contain the server's media marker placeholder.
|
||||
# The marker is randomized per server startup (read from /props).
|
||||
content.append(
|
||||
{
|
||||
"prompt_string": f"{self._media_marker}\n",
|
||||
"multimodal_data": [encoded], # type: ignore[dict-item]
|
||||
}
|
||||
encoded_images.append(base64.b64encode(to_encode).decode("utf-8"))
|
||||
|
||||
def build_content() -> list[dict[str, Any]]:
|
||||
# prompt_string must contain the server's media marker placeholder
|
||||
# for each image. The marker is randomized per server startup.
|
||||
content: list[dict[str, Any]] = []
|
||||
for text in texts:
|
||||
content.append({"prompt_string": text})
|
||||
for encoded in encoded_images:
|
||||
content.append(
|
||||
{
|
||||
"prompt_string": f"{self._media_marker}\n",
|
||||
"multimodal_data": [encoded],
|
||||
}
|
||||
)
|
||||
return content
|
||||
|
||||
def post_embeddings() -> requests.Response:
|
||||
return requests.post(
|
||||
f"{self.provider}/embeddings",
|
||||
json={"model": self.genai_config.model, "content": build_content()},
|
||||
timeout=self.timeout,
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.provider}/embeddings",
|
||||
json={"model": self.genai_config.model, "content": content},
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
try:
|
||||
response = post_embeddings()
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.RequestException:
|
||||
# The server may have restarted with a new media marker.
|
||||
# Refresh from /props; only retry if the marker actually changed.
|
||||
if not encoded_images or not self._refresh_media_marker():
|
||||
raise
|
||||
response = post_embeddings()
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
items = result.get("data", result) if isinstance(result, dict) else result
|
||||
|
||||
Loading…
Reference in New Issue
Block a user