Support using GenAI for embeddings / semantic search (#22323)

* Support GenAI for embeddings

* Add embed API support

* Add support for embedding via genai

* Basic docs

* undo

* Fix sending images

* Don't require download check

* Set model

* Handle emb correctly

* Clarification

* Cleanup

* Cleanup
This commit is contained in:
Nicolas Mowen 2026-03-08 09:55:00 -06:00 committed by GitHub
parent acdfed40a9
commit a705f254e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 346 additions and 20 deletions

View File

@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
:::
### GenAI Provider
Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
To use llama.cpp for semantic search:
1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
```yaml
genai:
default:
provider: llamacpp
base_url: http://localhost:8080
model: your-model-name
roles:
- embeddings
- vision
- tools
semantic_search:
enabled: True
model: default
```
The llama.cpp server must be started with `--embeddings` for the embeddings API, and a multi-modal embeddings model. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
:::note
Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
:::
### GPU Acceleration
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.

View File

@ -1,5 +1,5 @@
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from pydantic import ConfigDict, Field
@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel):
title="Reindex on startup",
description="Trigger a full reindex of historical tracked objects into the embeddings database.",
)
model: Optional[SemanticSearchModelEnum] = Field(
model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
default=SemanticSearchModelEnum.jinav1,
title="Semantic search model",
description="The embeddings model to use for semantic search (for example 'jinav1').",
title="Semantic search model or GenAI provider name",
description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.",
)
model_size: str = Field(
default="small",

View File

@ -61,6 +61,7 @@ from .classification import (
FaceRecognitionConfig,
LicensePlateRecognitionConfig,
SemanticSearchConfig,
SemanticSearchModelEnum,
)
from .database import DatabaseConfig
from .env import EnvVars
@ -592,6 +593,24 @@ class FrigateConfig(FrigateBaseModel):
)
role_to_name[role] = name
# validate semantic_search.model when it is a GenAI provider name
if (
self.semantic_search.enabled
and isinstance(self.semantic_search.model, str)
and not isinstance(self.semantic_search.model, SemanticSearchModelEnum)
):
if self.semantic_search.model not in self.genai:
raise ValueError(
f"semantic_search.model '{self.semantic_search.model}' is not a "
"valid GenAI config key. Must match a key in genai config."
)
genai_cfg = self.genai[self.semantic_search.model]
if GenAIRoleEnum.embeddings not in genai_cfg.roles:
raise ValueError(
f"GenAI provider '{self.semantic_search.model}' must have "
"'embeddings' in its roles for semantic search."
)
# set default min_score for object attributes
for attribute in self.model.all_attributes:
if not self.objects.filters.get(attribute):

View File

@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
from frigate.util.file import get_event_thumbnail_bytes
from .genai_embedding import GenAIEmbedding
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
from .onnx.jina_v2_embedding import JinaV2Embedding
@ -73,6 +74,7 @@ class Embeddings:
config: FrigateConfig,
db: SqliteVecQueueDatabase,
metrics: DataProcessorMetrics,
genai_manager=None,
) -> None:
self.config = config
self.db = db
@ -104,7 +106,27 @@ class Embeddings:
},
)
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
if not isinstance(model_cfg, SemanticSearchModelEnum):
# GenAI provider
embeddings_client = (
genai_manager.embeddings_client if genai_manager else None
)
if not embeddings_client:
raise ValueError(
f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
"no embeddings client is configured. Ensure the GenAI provider "
"has 'embeddings' in its roles."
)
self.embedding = GenAIEmbedding(embeddings_client)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
elif model_cfg == SemanticSearchModelEnum.jinav2:
# Single JinaV2Embedding instance for both text and vision
self.embedding = JinaV2Embedding(
model_size=self.config.semantic_search.model_size,
@ -118,7 +140,8 @@ class Embeddings:
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
else: # Default to jinav1
else:
# Default to jinav1
self.text_embedding = JinaV1TextEmbedding(
model_size=config.semantic_search.model_size,
requestor=self.requestor,
@ -136,8 +159,11 @@ class Embeddings:
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
def get_model_definitions(self):
# Version-specific models
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
if not isinstance(model_cfg, SemanticSearchModelEnum):
# GenAI provider: no ONNX models to download
models = []
elif model_cfg == SemanticSearchModelEnum.jinav2:
models = [
"jinaai/jina-clip-v2-tokenizer",
"jinaai/jina-clip-v2-model_fp16.onnx"
@ -312,11 +338,12 @@ class Embeddings:
# Get total count of events to process
total_events = Event.select().count()
batch_size = (
4
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
else 32
)
if not isinstance(self.config.semantic_search.model, SemanticSearchModelEnum):
batch_size = 1
elif self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
batch_size = 4
else:
batch_size = 32
current_page = 1
totals = {

View File

@ -0,0 +1,89 @@
"""GenAI-backed embeddings for semantic search."""
import io
import logging
from typing import TYPE_CHECKING
import numpy as np
from PIL import Image
if TYPE_CHECKING:
from frigate.genai import GenAIClient
logger = logging.getLogger(__name__)
EMBEDDING_DIM = 768
class GenAIEmbedding:
"""Embedding adapter that delegates to a GenAI provider's embed API.
Provides the same interface as JinaV2Embedding for semantic search:
__call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
normalized to 768 dimensions for Frigate's sqlite-vec schema.
"""
def __init__(self, client: "GenAIClient") -> None:
self.client = client
def __call__(
self,
inputs: list[str] | list[bytes] | list[Image.Image],
embedding_type: str = "text",
) -> list[np.ndarray]:
"""Generate embeddings for text or images.
Args:
inputs: List of strings (text) or bytes/PIL images (vision).
embedding_type: "text" or "vision".
Returns:
List of 768-dim numpy float32 arrays.
"""
if not inputs:
return []
if embedding_type == "text":
texts = [str(x) for x in inputs]
embeddings = self.client.embed(texts=texts)
elif embedding_type == "vision":
images: list[bytes] = []
for inp in inputs:
if isinstance(inp, bytes):
images.append(inp)
elif isinstance(inp, Image.Image):
buf = io.BytesIO()
inp.convert("RGB").save(buf, format="JPEG")
images.append(buf.getvalue())
else:
logger.warning(
"GenAIEmbedding: skipping unsupported vision input type %s",
type(inp).__name__,
)
if not images:
return []
embeddings = self.client.embed(images=images)
else:
raise ValueError(
f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
)
result = []
for emb in embeddings:
arr = np.asarray(emb, dtype=np.float32)
if arr.ndim > 1:
# Some providers return token-level embeddings; pool to one vector.
arr = arr.mean(axis=0)
arr = arr.flatten()
if arr.size != EMBEDDING_DIM:
if arr.size > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - arr.size),
mode="constant",
constant_values=0,
)
result.append(arr)
return result

View File

@ -123,8 +123,10 @@ class EmbeddingMaintainer(threading.Thread):
models = [Event, Recordings, ReviewSegment, Trigger]
db.bind(models)
self.genai_manager = GenAIClientManager(config)
if config.semantic_search.enabled:
self.embeddings = Embeddings(config, db, metrics)
self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
# Check if we need to re-index events
if config.semantic_search.reindex:
@ -151,7 +153,6 @@ class EmbeddingMaintainer(threading.Thread):
self.frame_manager = SharedMemoryFrameManager()
self.detected_license_plates: dict[str, dict[str, Any]] = {}
self.genai_manager = GenAIClientManager(config)
# model runners to share between realtime and post processors
if self.config.lpr.enabled:

View File

@ -7,6 +7,7 @@ import os
import re
from typing import Any, Optional
import numpy as np
from playhouse.shortcuts import model_to_dict
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
@ -304,6 +305,25 @@ Guidelines:
"""Get the context window size for this provider in tokens."""
return 4096
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings for text and/or images.
Returns list of numpy arrays (one per input). Expected dimension is 768
for Frigate semantic search compatibility.
Providers that support embeddings should override this method.
"""
logger.warning(
"%s does not support embeddings. "
"This method should be overridden by the provider implementation.",
self.__class__.__name__,
)
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],

View File

@ -1,12 +1,15 @@
"""llama.cpp Provider for Frigate AI."""
import base64
import io
import json
import logging
from typing import Any, Optional
import httpx
import numpy as np
import requests
from PIL import Image
from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider
@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message
logger = logging.getLogger(__name__)
def _to_jpeg(img_bytes: bytes) -> bytes | None:
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
try:
img = Image.open(io.BytesIO(img_bytes))
if img.mode != "RGB":
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=85)
return buf.getvalue()
except Exception as e:
logger.warning("Failed to convert image to JPEG: %s", e)
return None
@register_genai_provider(GenAIProviderEnum.llamacpp)
class LlamaCppClient(GenAIClient):
"""Generative AI client for Frigate using llama.cpp server."""
@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient):
)
return result if result else None
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings via llama.cpp /embeddings endpoint.
Supports batch requests. Uses content format with prompt_string and
multimodal_data for images (PR #15108). Server must be started with
--embeddings and --mmproj for multimodal support.
"""
if self.provider is None:
logger.warning(
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
)
return []
texts = texts or []
images = images or []
if not texts and not images:
return []
EMBEDDING_DIM = 768
content = []
for text in texts:
content.append({"prompt_string": text})
for img in images:
# llama.cpp uses STB which does not support WebP; convert to JPEG
jpeg_bytes = _to_jpeg(img)
to_encode = jpeg_bytes if jpeg_bytes is not None else img
encoded = base64.b64encode(to_encode).decode("utf-8")
# prompt_string must contain <__media__> placeholder for image tokenization
content.append(
{
"prompt_string": "<__media__>\n",
"multimodal_data": [encoded],
}
)
try:
response = requests.post(
f"{self.provider}/embeddings",
json={"model": self.genai_config.model, "content": content},
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()
items = result.get("data", result) if isinstance(result, dict) else result
if not isinstance(items, list):
logger.warning("llama.cpp embeddings returned unexpected format")
return []
embeddings = []
for item in items:
emb = item.get("embedding") if isinstance(item, dict) else None
if emb is None:
logger.warning("llama.cpp embeddings item missing embedding field")
continue
arr = np.array(emb, dtype=np.float32)
if arr.ndim > 1:
# llama.cpp can return token-level embeddings; pool per item
arr = arr.mean(axis=0)
arr = arr.flatten()
orig_dim = arr.size
if orig_dim != EMBEDDING_DIM:
if orig_dim > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
logger.debug(
"Truncated llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - orig_dim),
mode="constant",
constant_values=0,
)
logger.debug(
"Padded llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
embeddings.append(arr)
return embeddings
except requests.exceptions.Timeout:
logger.warning("llama.cpp embeddings request timed out")
return []
except requests.exceptions.RequestException as e:
error_detail = str(e)
if hasattr(e, "response") and e.response is not None:
try:
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
except Exception:
pass
logger.warning("llama.cpp embeddings error: %s", error_detail)
return []
except Exception as e:
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],

View File

@ -1,3 +1,6 @@
/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
export const supportedLanguageKeys = [
"en",
"es",

View File

@ -23,6 +23,7 @@ import { toast } from "sonner";
import useSWR from "swr";
import useSWRInfinite from "swr/infinite";
import { useDocDomain } from "@/hooks/use-doc-domain";
import { JINA_EMBEDDING_MODELS } from "@/lib/const";
const API_LIMIT = 25;
@ -293,7 +294,12 @@ export default function Explore() {
const modelVersion = config?.semantic_search.model || "jinav1";
const modelSize = config?.semantic_search.model_size || "small";
// Text model state
// GenAI providers have no local models to download
const isGenaiEmbeddings =
typeof modelVersion === "string" &&
!(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
// Text model state (skipped for GenAI - no local models)
const { payload: textModelState } = useModelState(
modelVersion === "jinav1"
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@ -328,6 +334,10 @@ export default function Explore() {
);
const allModelsLoaded = useMemo(() => {
if (isGenaiEmbeddings) {
return true;
}
return (
textModelState === "downloaded" &&
textTokenizerState === "downloaded" &&
@ -335,6 +345,7 @@ export default function Explore() {
visionFeatureExtractorState === "downloaded"
);
}, [
isGenaiEmbeddings,
textModelState,
textTokenizerState,
visionModelState,
@ -358,10 +369,11 @@ export default function Explore() {
!defaultViewLoaded ||
(config?.semantic_search.enabled &&
(!reindexState ||
!textModelState ||
(!isGenaiEmbeddings &&
(!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))
!visionFeatureExtractorState))))
) {
return (
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />