mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-10 10:33:11 +03:00
Add embed API support
This commit is contained in:
parent
0ecefa3496
commit
1fac971bc0
@ -7,6 +7,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from playhouse.shortcuts import model_to_dict
|
from playhouse.shortcuts import model_to_dict
|
||||||
|
|
||||||
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
|
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
|
||||||
@ -304,6 +305,25 @@ Guidelines:
|
|||||||
"""Get the context window size for this provider in tokens."""
|
"""Get the context window size for this provider in tokens."""
|
||||||
return 4096
|
return 4096
|
||||||
|
|
||||||
|
def embed(
|
||||||
|
self,
|
||||||
|
texts: list[str] | None = None,
|
||||||
|
images: list[bytes] | None = None,
|
||||||
|
) -> list[np.ndarray]:
|
||||||
|
"""Generate embeddings for text and/or images.
|
||||||
|
|
||||||
|
Returns list of numpy arrays (one per input). Expected dimension is 768
|
||||||
|
for Frigate semantic search compatibility.
|
||||||
|
|
||||||
|
Providers that support embeddings should override this method.
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"%s does not support embeddings. "
|
||||||
|
"This method should be overridden by the provider implementation.",
|
||||||
|
self.__class__.__name__,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
def chat_with_tools(
|
def chat_with_tools(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import logging
|
|||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from frigate.config import GenAIProviderEnum
|
from frigate.config import GenAIProviderEnum
|
||||||
@ -176,6 +177,97 @@ class LlamaCppClient(GenAIClient):
|
|||||||
)
|
)
|
||||||
return result if result else None
|
return result if result else None
|
||||||
|
|
||||||
|
def embed(
|
||||||
|
self,
|
||||||
|
texts: list[str] | None = None,
|
||||||
|
images: list[bytes] | None = None,
|
||||||
|
) -> list[np.ndarray]:
|
||||||
|
"""Generate embeddings via llama.cpp /embeddings endpoint.
|
||||||
|
|
||||||
|
Supports batch requests. Uses content format with prompt_string and
|
||||||
|
multimodal_data for images (PR #15108). Server must be started with
|
||||||
|
--embeddings and --mmproj for multimodal support.
|
||||||
|
"""
|
||||||
|
if self.provider is None:
|
||||||
|
logger.warning(
|
||||||
|
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
texts = texts or []
|
||||||
|
images = images or []
|
||||||
|
if not texts and not images:
|
||||||
|
return []
|
||||||
|
|
||||||
|
EMBEDDING_DIM = 768
|
||||||
|
|
||||||
|
content = []
|
||||||
|
for text in texts:
|
||||||
|
content.append({"prompt_string": text})
|
||||||
|
for img in images:
|
||||||
|
encoded = base64.b64encode(img).decode("utf-8")
|
||||||
|
content.append({"prompt_string": "", "multimodal_data": [encoded]})
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.provider}/embeddings",
|
||||||
|
json={"content": content},
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
items = result.get("data", result) if isinstance(result, dict) else result
|
||||||
|
if not isinstance(items, list):
|
||||||
|
logger.warning("llama.cpp embeddings returned unexpected format")
|
||||||
|
return []
|
||||||
|
|
||||||
|
embeddings = []
|
||||||
|
for item in items:
|
||||||
|
emb = item.get("embedding") if isinstance(item, dict) else None
|
||||||
|
if emb is None:
|
||||||
|
logger.warning("llama.cpp embeddings item missing embedding field")
|
||||||
|
continue
|
||||||
|
arr = np.array(emb, dtype=np.float32)
|
||||||
|
orig_dim = arr.size
|
||||||
|
if orig_dim != EMBEDDING_DIM:
|
||||||
|
if orig_dim > EMBEDDING_DIM:
|
||||||
|
arr = arr[:EMBEDDING_DIM]
|
||||||
|
logger.debug(
|
||||||
|
"Truncated llama.cpp embedding from %d to %d dimensions",
|
||||||
|
orig_dim,
|
||||||
|
EMBEDDING_DIM,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
arr = np.pad(
|
||||||
|
arr,
|
||||||
|
(0, EMBEDDING_DIM - orig_dim),
|
||||||
|
mode="constant",
|
||||||
|
constant_values=0,
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"Padded llama.cpp embedding from %d to %d dimensions",
|
||||||
|
orig_dim,
|
||||||
|
EMBEDDING_DIM,
|
||||||
|
)
|
||||||
|
embeddings.append(arr)
|
||||||
|
return embeddings
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logger.warning("llama.cpp embeddings request timed out")
|
||||||
|
return []
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
error_detail = str(e)
|
||||||
|
if hasattr(e, "response") and e.response is not None:
|
||||||
|
try:
|
||||||
|
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.warning("llama.cpp embeddings error: %s", error_detail)
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
|
||||||
|
return []
|
||||||
|
|
||||||
def chat_with_tools(
|
def chat_with_tools(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user