Support using GenAI for embeddings / semantic search (#22323)

* Support GenAI for embeddings * Add embed API support * Add support for embedding via genai * Basic docs * undo * Fix sending images * Don't require download check * Set model * Handle emb correctly * Clarification * Cleanup * Cleanup
2026-07-18 09:51:14 +03:00 · 2026-03-08 09:55:00 -06:00 · 2026-03-08 09:55:00 -06:00 · a705f254e5
commit a705f254e5
parent acdfed40a9
10 changed files with 346 additions and 20 deletions
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings

 :::

+### GenAI Provider
+
+Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
+
+To use llama.cpp for semantic search:
+
+1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
+2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
+3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
+
+```yaml
+genai:
+  default:
+    provider: llamacpp
+    base_url: http://localhost:8080
+    model: your-model-name
+    roles:
+      - embeddings
+      - vision
+      - tools
+
+semantic_search:
+  enabled: True
+  model: default
+```
+
+The llama.cpp server must be started with `--embeddings` for the embeddings API, and a multi-modal embeddings model. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
+
+:::note
+
+Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
+
+:::
+
 ### GPU Acceleration

 The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union

 from pydantic import ConfigDict, Field

@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel):
        title="Reindex on startup",
        description="Trigger a full reindex of historical tracked objects into the embeddings database.",
    )
-    model: Optional[SemanticSearchModelEnum] = Field(
+    model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
        default=SemanticSearchModelEnum.jinav1,
-        title="Semantic search model",
-        description="The embeddings model to use for semantic search (for example 'jinav1').",
+        title="Semantic search model or GenAI provider name",
+        description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.",
    )
    model_size: str = Field(
        default="small",
--- a/frigate/config/config.py
+++ b/frigate/config/config.py
@ -61,6 +61,7 @@ from .classification import (
    FaceRecognitionConfig,
    LicensePlateRecognitionConfig,
    SemanticSearchConfig,
+    SemanticSearchModelEnum,
 )
 from .database import DatabaseConfig
 from .env import EnvVars
@ -592,6 +593,24 @@ class FrigateConfig(FrigateBaseModel):
                    )
                role_to_name[role] = name

+        # validate semantic_search.model when it is a GenAI provider name
+        if (
+            self.semantic_search.enabled
+            and isinstance(self.semantic_search.model, str)
+            and not isinstance(self.semantic_search.model, SemanticSearchModelEnum)
+        ):
+            if self.semantic_search.model not in self.genai:
+                raise ValueError(
+                    f"semantic_search.model '{self.semantic_search.model}' is not a "
+                    "valid GenAI config key. Must match a key in genai config."
+                )
+            genai_cfg = self.genai[self.semantic_search.model]
+            if GenAIRoleEnum.embeddings not in genai_cfg.roles:
+                raise ValueError(
+                    f"GenAI provider '{self.semantic_search.model}' must have "
+                    "'embeddings' in its roles for semantic search."
+                )
+
        # set default min_score for object attributes
        for attribute in self.model.all_attributes:
            if not self.objects.filters.get(attribute):
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
 from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
 from frigate.util.file import get_event_thumbnail_bytes

+from .genai_embedding import GenAIEmbedding
 from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
 from .onnx.jina_v2_embedding import JinaV2Embedding

@ -73,6 +74,7 @@ class Embeddings:
        config: FrigateConfig,
        db: SqliteVecQueueDatabase,
        metrics: DataProcessorMetrics,
+        genai_manager=None,
    ) -> None:
        self.config = config
        self.db = db
@ -104,7 +106,27 @@ class Embeddings:
                },
            )

-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        model_cfg = self.config.semantic_search.model
+
+        if not isinstance(model_cfg, SemanticSearchModelEnum):
+            # GenAI provider
+            embeddings_client = (
+                genai_manager.embeddings_client if genai_manager else None
+            )
+            if not embeddings_client:
+                raise ValueError(
+                    f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
+                    "no embeddings client is configured. Ensure the GenAI provider "
+                    "has 'embeddings' in its roles."
+                )
+            self.embedding = GenAIEmbedding(embeddings_client)
+            self.text_embedding = lambda input_data: self.embedding(
+                input_data, embedding_type="text"
+            )
+            self.vision_embedding = lambda input_data: self.embedding(
+                input_data, embedding_type="vision"
+            )
+        elif model_cfg == SemanticSearchModelEnum.jinav2:
            # Single JinaV2Embedding instance for both text and vision
            self.embedding = JinaV2Embedding(
                model_size=self.config.semantic_search.model_size,
@ -118,7 +140,8 @@ class Embeddings:
            self.vision_embedding = lambda input_data: self.embedding(
                input_data, embedding_type="vision"
            )
-        else:  # Default to jinav1
+        else:
+            # Default to jinav1
            self.text_embedding = JinaV1TextEmbedding(
                model_size=config.semantic_search.model_size,
                requestor=self.requestor,
@ -136,8 +159,11 @@ class Embeddings:
        self.metrics.text_embeddings_eps.value = self.text_eps.eps()

    def get_model_definitions(self):
-        # Version-specific models
-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        model_cfg = self.config.semantic_search.model
+        if not isinstance(model_cfg, SemanticSearchModelEnum):
+            # GenAI provider: no ONNX models to download
+            models = []
+        elif model_cfg == SemanticSearchModelEnum.jinav2:
            models = [
                "jinaai/jina-clip-v2-tokenizer",
                "jinaai/jina-clip-v2-model_fp16.onnx"
@ -312,11 +338,12 @@ class Embeddings:
        # Get total count of events to process
        total_events = Event.select().count()

-        batch_size = (
-            4
-            if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
-            else 32
-        )
+        if not isinstance(self.config.semantic_search.model, SemanticSearchModelEnum):
+            batch_size = 1
+        elif self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+            batch_size = 4
+        else:
+            batch_size = 32
        current_page = 1

        totals = {
--- a/frigate/embeddings/genai_embedding.py
+++ b/frigate/embeddings/genai_embedding.py
@ -0,0 +1,89 @@
+"""GenAI-backed embeddings for semantic search."""
+
+import io
+import logging
+from typing import TYPE_CHECKING
+
+import numpy as np
+from PIL import Image
+
+if TYPE_CHECKING:
+    from frigate.genai import GenAIClient
+
+logger = logging.getLogger(__name__)
+
+EMBEDDING_DIM = 768
+
+
+class GenAIEmbedding:
+    """Embedding adapter that delegates to a GenAI provider's embed API.
+
+    Provides the same interface as JinaV2Embedding for semantic search:
+    __call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
+    normalized to 768 dimensions for Frigate's sqlite-vec schema.
+    """
+
+    def __init__(self, client: "GenAIClient") -> None:
+        self.client = client
+
+    def __call__(
+        self,
+        inputs: list[str] | list[bytes] | list[Image.Image],
+        embedding_type: str = "text",
+    ) -> list[np.ndarray]:
+        """Generate embeddings for text or images.
+
+        Args:
+            inputs: List of strings (text) or bytes/PIL images (vision).
+            embedding_type: "text" or "vision".
+
+        Returns:
+            List of 768-dim numpy float32 arrays.
+        """
+        if not inputs:
+            return []
+
+        if embedding_type == "text":
+            texts = [str(x) for x in inputs]
+            embeddings = self.client.embed(texts=texts)
+        elif embedding_type == "vision":
+            images: list[bytes] = []
+            for inp in inputs:
+                if isinstance(inp, bytes):
+                    images.append(inp)
+                elif isinstance(inp, Image.Image):
+                    buf = io.BytesIO()
+                    inp.convert("RGB").save(buf, format="JPEG")
+                    images.append(buf.getvalue())
+                else:
+                    logger.warning(
+                        "GenAIEmbedding: skipping unsupported vision input type %s",
+                        type(inp).__name__,
+                    )
+            if not images:
+                return []
+            embeddings = self.client.embed(images=images)
+        else:
+            raise ValueError(
+                f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
+            )
+
+        result = []
+        for emb in embeddings:
+            arr = np.asarray(emb, dtype=np.float32)
+            if arr.ndim > 1:
+                # Some providers return token-level embeddings; pool to one vector.
+                arr = arr.mean(axis=0)
+            arr = arr.flatten()
+            if arr.size != EMBEDDING_DIM:
+                if arr.size > EMBEDDING_DIM:
+                    arr = arr[:EMBEDDING_DIM]
+                else:
+                    arr = np.pad(
+                        arr,
+                        (0, EMBEDDING_DIM - arr.size),
+                        mode="constant",
+                        constant_values=0,
+                    )
+            result.append(arr)
+        return result
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -123,8 +123,10 @@ class EmbeddingMaintainer(threading.Thread):
        models = [Event, Recordings, ReviewSegment, Trigger]
        db.bind(models)

+        self.genai_manager = GenAIClientManager(config)
+
        if config.semantic_search.enabled:
-            self.embeddings = Embeddings(config, db, metrics)
+            self.embeddings = Embeddings(config, db, metrics, self.genai_manager)

            # Check if we need to re-index events
            if config.semantic_search.reindex:
@ -151,7 +153,6 @@ class EmbeddingMaintainer(threading.Thread):
        self.frame_manager = SharedMemoryFrameManager()

        self.detected_license_plates: dict[str, dict[str, Any]] = {}
-        self.genai_manager = GenAIClientManager(config)

        # model runners to share between realtime and post processors
        if self.config.lpr.enabled:
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -7,6 +7,7 @@ import os
 import re
 from typing import Any, Optional

+import numpy as np
 from playhouse.shortcuts import model_to_dict

 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
@ -304,6 +305,25 @@ Guidelines:
        """Get the context window size for this provider in tokens."""
        return 4096

+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings for text and/or images.
+
+        Returns list of numpy arrays (one per input). Expected dimension is 768
+        for Frigate semantic search compatibility.
+
+        Providers that support embeddings should override this method.
+        """
+        logger.warning(
+            "%s does not support embeddings. "
+            "This method should be overridden by the provider implementation.",
+            self.__class__.__name__,
+        )
+        return []
+
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@ -1,12 +1,15 @@
 """llama.cpp Provider for Frigate AI."""

 import base64
+import io
 import json
 import logging
 from typing import Any, Optional

 import httpx
+import numpy as np
 import requests
+from PIL import Image

 from frigate.config import GenAIProviderEnum
 from frigate.genai import GenAIClient, register_genai_provider
@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message
 logger = logging.getLogger(__name__)


+def _to_jpeg(img_bytes: bytes) -> bytes | None:
+    """Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
+    try:
+        img = Image.open(io.BytesIO(img_bytes))
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG", quality=85)
+        return buf.getvalue()
+    except Exception as e:
+        logger.warning("Failed to convert image to JPEG: %s", e)
+        return None
+
+
@register_genai_provider(GenAIProviderEnum.llamacpp)
 class LlamaCppClient(GenAIClient):
    """Generative AI client for Frigate using llama.cpp server."""
@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient):
            )
        return result if result else None

+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings via llama.cpp /embeddings endpoint.
+
+        Supports batch requests. Uses content format with prompt_string and
+        multimodal_data for images (PR #15108). Server must be started with
+        --embeddings and --mmproj for multimodal support.
+        """
+        if self.provider is None:
+            logger.warning(
+                "llama.cpp provider has not been initialized. Check your llama.cpp configuration."
+            )
+            return []
+
+        texts = texts or []
+        images = images or []
+        if not texts and not images:
+            return []
+
+        EMBEDDING_DIM = 768
+
+        content = []
+        for text in texts:
+            content.append({"prompt_string": text})
+        for img in images:
+            # llama.cpp uses STB which does not support WebP; convert to JPEG
+            jpeg_bytes = _to_jpeg(img)
+            to_encode = jpeg_bytes if jpeg_bytes is not None else img
+            encoded = base64.b64encode(to_encode).decode("utf-8")
+            # prompt_string must contain <__media__> placeholder for image tokenization
+            content.append(
+                {
+                    "prompt_string": "<__media__>\n",
+                    "multimodal_data": [encoded],
+                }
+            )
+
+        try:
+            response = requests.post(
+                f"{self.provider}/embeddings",
+                json={"model": self.genai_config.model, "content": content},
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            items = result.get("data", result) if isinstance(result, dict) else result
+            if not isinstance(items, list):
+                logger.warning("llama.cpp embeddings returned unexpected format")
+                return []
+
+            embeddings = []
+            for item in items:
+                emb = item.get("embedding") if isinstance(item, dict) else None
+                if emb is None:
+                    logger.warning("llama.cpp embeddings item missing embedding field")
+                    continue
+                arr = np.array(emb, dtype=np.float32)
+                if arr.ndim > 1:
+                    # llama.cpp can return token-level embeddings; pool per item
+                    arr = arr.mean(axis=0)
+                arr = arr.flatten()
+                orig_dim = arr.size
+                if orig_dim != EMBEDDING_DIM:
+                    if orig_dim > EMBEDDING_DIM:
+                        arr = arr[:EMBEDDING_DIM]
+                        logger.debug(
+                            "Truncated llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                    else:
+                        arr = np.pad(
+                            arr,
+                            (0, EMBEDDING_DIM - orig_dim),
+                            mode="constant",
+                            constant_values=0,
+                        )
+                        logger.debug(
+                            "Padded llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                embeddings.append(arr)
+            return embeddings
+        except requests.exceptions.Timeout:
+            logger.warning("llama.cpp embeddings request timed out")
+            return []
+        except requests.exceptions.RequestException as e:
+            error_detail = str(e)
+            if hasattr(e, "response") and e.response is not None:
+                try:
+                    error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
+                except Exception:
+                    pass
+            logger.warning("llama.cpp embeddings error: %s", error_detail)
+            return []
+        except Exception as e:
+            logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
+            return []
+
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
--- a/web/src/lib/const.ts
+++ b/web/src/lib/const.ts
@ -1,3 +1,6 @@
+/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
+export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
+
 export const supportedLanguageKeys = [
  "en",
  "es",
--- a/web/src/pages/Explore.tsx
+++ b/web/src/pages/Explore.tsx
@ -23,6 +23,7 @@ import { toast } from "sonner";
 import useSWR from "swr";
 import useSWRInfinite from "swr/infinite";
 import { useDocDomain } from "@/hooks/use-doc-domain";
+import { JINA_EMBEDDING_MODELS } from "@/lib/const";

 const API_LIMIT = 25;

@ -293,7 +294,12 @@ export default function Explore() {
  const modelVersion = config?.semantic_search.model || "jinav1";
  const modelSize = config?.semantic_search.model_size || "small";

-  // Text model state
+  // GenAI providers have no local models to download
+  const isGenaiEmbeddings =
+    typeof modelVersion === "string" &&
+    !(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
+
+  // Text model state (skipped for GenAI - no local models)
  const { payload: textModelState } = useModelState(
    modelVersion === "jinav1"
      ? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@ -328,6 +334,10 @@ export default function Explore() {
  );

  const allModelsLoaded = useMemo(() => {
+    if (isGenaiEmbeddings) {
+      return true;
+    }
+
    return (
      textModelState === "downloaded" &&
      textTokenizerState === "downloaded" &&
@ -335,6 +345,7 @@ export default function Explore() {
      visionFeatureExtractorState === "downloaded"
    );
  }, [
+    isGenaiEmbeddings,
    textModelState,
    textTokenizerState,
    visionModelState,
@ -358,10 +369,11 @@ export default function Explore() {
    !defaultViewLoaded ||
    (config?.semantic_search.enabled &&
      (!reindexState ||
-        !textModelState ||
+        (!isGenaiEmbeddings &&
+          (!textModelState ||
            !textTokenizerState ||
            !visionModelState ||
-        !visionFeatureExtractorState))
+            !visionFeatureExtractorState))))
  ) {
    return (
      <ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />