Handle emb correctly

Set model
Don't require download check
2026-05-01 19:17:41 +03:00 · 2026-03-02 14:31:04 -07:00 · 2026-03-02 14:15:04 -07:00 · 2026-03-02 14:15:04 -07:00 · 2026-03-02 14:15:04 -07:00 · 2026-03-02 14:15:04 -07:00
10 changed files with 388 additions and 20 deletions
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
 :::
 ### GenAI Provider (llama.cpp)
 Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
 To use llama.cpp for semantic search:
 1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
 2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
 3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
 ```yaml
 genai:
  default:
    provider: llamacpp
    base_url: http://localhost:8080
    model: your-model-name
    roles:
      - embeddings
      - vision
      - tools
 semantic_search:
  enabled: True
  model: default
 ```
 The llama.cpp server must be started with `--embeddings` for the embeddings API, and `--mmproj <mmproj.gguf>` when using image embeddings. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
 :::note
 Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
 :::
 ### GPU Acceleration
 The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from pydantic import ConfigDict, Field
@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel):
        title="Reindex on startup",
        description="Trigger a full reindex of historical tracked objects into the embeddings database.",
    )
-    model: Optional[SemanticSearchModelEnum] = Field(
+    model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
        default=SemanticSearchModelEnum.jinav1,
-        title="Semantic search model",
+        title="Semantic search model or GenAI provider name",
-        description="The embeddings model to use for semantic search (for example 'jinav1').",
+        description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.",
    )
    model_size: str = Field(
        default="small",
--- a/frigate/config/config.py
+++ b/frigate/config/config.py
@ -592,6 +592,22 @@ class FrigateConfig(FrigateBaseModel):
                    )
                role_to_name[role] = name
        # validate semantic_search.model when it is a GenAI provider name
        if self.semantic_search.enabled and isinstance(
            self.semantic_search.model, str
        ):
            if self.semantic_search.model not in self.genai:
                raise ValueError(
                    f"semantic_search.model '{self.semantic_search.model}' is not a "
                    "valid GenAI config key. Must match a key in genai config."
                )
            genai_cfg = self.genai[self.semantic_search.model]
            if GenAIRoleEnum.embeddings not in genai_cfg.roles:
                raise ValueError(
                    f"GenAI provider '{self.semantic_search.model}' must have "
                    "'embeddings' in its roles for semantic search."
                )
        # set default min_score for object attributes
        for attribute in self.model.all_attributes:
            if not self.objects.filters.get(attribute):
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
 from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
 from frigate.util.file import get_event_thumbnail_bytes
 from .genai_embedding import GenAIEmbedding
 from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
 from .onnx.jina_v2_embedding import JinaV2Embedding
@ -73,11 +74,13 @@ class Embeddings:
        config: FrigateConfig,
        db: SqliteVecQueueDatabase,
        metrics: DataProcessorMetrics,
        genai_manager=None,
    ) -> None:
        self.config = config
        self.db = db
        self.metrics = metrics
        self.requestor = InterProcessRequestor()
        self.genai_manager = genai_manager
        self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
        self.image_eps = EventsPerSecond()
@ -104,7 +107,27 @@ class Embeddings:
                },
            )
-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        model_cfg = self.config.semantic_search.model
        is_genai_model = isinstance(model_cfg, str)
        if is_genai_model:
            embeddings_client = (
                genai_manager.embeddings_client if genai_manager else None
            )
            if not embeddings_client:
                raise ValueError(
                    f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
                    "no embeddings client is configured. Ensure the GenAI provider "
                    "has 'embeddings' in its roles."
                )
            self.embedding = GenAIEmbedding(embeddings_client)
            self.text_embedding = lambda input_data: self.embedding(
                input_data, embedding_type="text"
            )
            self.vision_embedding = lambda input_data: self.embedding(
                input_data, embedding_type="vision"
            )
        elif model_cfg == SemanticSearchModelEnum.jinav2:
            # Single JinaV2Embedding instance for both text and vision
            self.embedding = JinaV2Embedding(
                model_size=self.config.semantic_search.model_size,
@ -118,7 +141,8 @@ class Embeddings:
            self.vision_embedding = lambda input_data: self.embedding(
                input_data, embedding_type="vision"
            )
-        else:  # Default to jinav1
+        else:
            # Default to jinav1
            self.text_embedding = JinaV1TextEmbedding(
                model_size=config.semantic_search.model_size,
                requestor=self.requestor,
@ -136,8 +160,11 @@ class Embeddings:
        self.metrics.text_embeddings_eps.value = self.text_eps.eps()
    def get_model_definitions(self):
-        # Version-specific models
+        model_cfg = self.config.semantic_search.model
-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        if isinstance(model_cfg, str):
            # GenAI provider: no ONNX models to download
            models = []
        elif model_cfg == SemanticSearchModelEnum.jinav2:
            models = [
                "jinaai/jina-clip-v2-tokenizer",
                "jinaai/jina-clip-v2-model_fp16.onnx"
@ -224,6 +251,14 @@ class Embeddings:
        embeddings = self.vision_embedding(valid_thumbs)
        if len(embeddings) != len(valid_ids):
            logger.warning(
                "Batch embed returned %d embeddings for %d thumbnails; skipping batch",
                len(embeddings),
                len(valid_ids),
            )
            return []
        if upsert:
            items = []
            for i in range(len(valid_ids)):
@ -246,9 +281,15 @@ class Embeddings:
    def embed_description(
        self, event_id: str, description: str, upsert: bool = True
-    ) -> np.ndarray:
+    ) -> np.ndarray | None:
        start = datetime.datetime.now().timestamp()
-        embedding = self.text_embedding([description])[0]
+        embeddings = self.text_embedding([description])
        if not embeddings:
            logger.warning(
                "Failed to generate description embedding for event %s", event_id
            )
            return None
        embedding = embeddings[0]
        if upsert:
            self.db.execute_sql(
@ -271,8 +312,32 @@ class Embeddings:
        # upsert embeddings one by one to avoid token limit
        embeddings = []
-        for desc in event_descriptions.values():
+        for eid, desc in event_descriptions.items():
-            embeddings.append(self.text_embedding([desc])[0])
+            result = self.text_embedding([desc])
            if not result:
                logger.warning(
                    "Failed to generate description embedding for event %s", eid
                )
                continue
            embeddings.append(result[0])
        if not embeddings:
            logger.warning("No description embeddings generated in batch")
            return np.array([])
        # Build ids list for only successful embeddings - we need to track which succeeded
        ids = list(event_descriptions.keys())
        if len(embeddings) != len(ids):
            # Rebuild ids/embeddings for only successful ones (match by order)
            ids = []
            embeddings_filtered = []
            for eid, desc in event_descriptions.items():
                result = self.text_embedding([desc])
                if result:
                    ids.append(eid)
                    embeddings_filtered.append(result[0])
            ids = ids
            embeddings = embeddings_filtered
        if upsert:
            ids = list(event_descriptions.keys())
@ -314,7 +379,10 @@ class Embeddings:
        batch_size = (
            4
-            if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
+            if (
                isinstance(self.config.semantic_search.model, str)
                or self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
            )
            else 32
        )
        current_page = 1
@ -601,6 +669,8 @@ class Embeddings:
        if trigger.type == "description":
            logger.debug(f"Generating embedding for trigger description {trigger_name}")
            embedding = self.embed_description(None, trigger.data, upsert=False)
            if embedding is None:
                return b""
            return embedding.astype(np.float32).tobytes()
        elif trigger.type == "thumbnail":
@ -636,6 +706,8 @@ class Embeddings:
                embedding = self.embed_thumbnail(
                    str(trigger.data), thumbnail, upsert=False
                )
                if embedding is None:
                    return b""
                return embedding.astype(np.float32).tobytes()
        else:
--- a/frigate/embeddings/genai_embedding.py
+++ b/frigate/embeddings/genai_embedding.py
@ -0,0 +1,89 @@
 """GenAI-backed embeddings for semantic search."""
 import io
 import logging
 from typing import TYPE_CHECKING
 import numpy as np
 from PIL import Image
 if TYPE_CHECKING:
    from frigate.genai import GenAIClient
 logger = logging.getLogger(__name__)
 EMBEDDING_DIM = 768
 class GenAIEmbedding:
    """Embedding adapter that delegates to a GenAI provider's embed API.
    Provides the same interface as JinaV2Embedding for semantic search:
    __call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
    normalized to 768 dimensions for Frigate's sqlite-vec schema.
    """
    def __init__(self, client: "GenAIClient") -> None:
        self.client = client
    def __call__(
        self,
        inputs: list[str] | list[bytes] | list[Image.Image],
        embedding_type: str = "text",
    ) -> list[np.ndarray]:
        """Generate embeddings for text or images.
        Args:
            inputs: List of strings (text) or bytes/PIL images (vision).
            embedding_type: "text" or "vision".
        Returns:
            List of 768-dim numpy float32 arrays.
        """
        if not inputs:
            return []
        if embedding_type == "text":
            texts = [str(x) for x in inputs]
            embeddings = self.client.embed(texts=texts)
        elif embedding_type == "vision":
            images: list[bytes] = []
            for inp in inputs:
                if isinstance(inp, bytes):
                    images.append(inp)
                elif isinstance(inp, Image.Image):
                    buf = io.BytesIO()
                    inp.convert("RGB").save(buf, format="JPEG")
                    images.append(buf.getvalue())
                else:
                    logger.warning(
                        "GenAIEmbedding: skipping unsupported vision input type %s",
                        type(inp).__name__,
                    )
            if not images:
                return []
            embeddings = self.client.embed(images=images)
        else:
            raise ValueError(
                f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
            )
        result = []
        for emb in embeddings:
            arr = np.asarray(emb, dtype=np.float32)
            if arr.ndim > 1:
                # Some providers return token-level embeddings; pool to one vector.
                arr = arr.mean(axis=0)
            arr = arr.flatten()
            if arr.size != EMBEDDING_DIM:
                if arr.size > EMBEDDING_DIM:
                    arr = arr[:EMBEDDING_DIM]
                else:
                    arr = np.pad(
                        arr,
                        (0, EMBEDDING_DIM - arr.size),
                        mode="constant",
                        constant_values=0,
                    )
            result.append(arr)
        return result
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -116,8 +116,10 @@ class EmbeddingMaintainer(threading.Thread):
        models = [Event, Recordings, ReviewSegment, Trigger]
        db.bind(models)
        self.genai_manager = GenAIClientManager(config)
        if config.semantic_search.enabled:
-            self.embeddings = Embeddings(config, db, metrics)
+            self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
            # Check if we need to re-index events
            if config.semantic_search.reindex:
@ -144,7 +146,6 @@ class EmbeddingMaintainer(threading.Thread):
        self.frame_manager = SharedMemoryFrameManager()
        self.detected_license_plates: dict[str, dict[str, Any]] = {}
        self.genai_manager = GenAIClientManager(config)
        # model runners to share between realtime and post processors
        if self.config.lpr.enabled:
--- a/frigate/genai/init.py
+++ b/frigate/genai/init.py
@ -7,6 +7,7 @@ import os
 import re
 from typing import Any, Optional
 import numpy as np
 from playhouse.shortcuts import model_to_dict
 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
@ -304,6 +305,25 @@ Guidelines:
        """Get the context window size for this provider in tokens."""
        return 4096
    def embed(
        self,
        texts: list[str] | None = None,
        images: list[bytes] | None = None,
    ) -> list[np.ndarray]:
        """Generate embeddings for text and/or images.
        Returns list of numpy arrays (one per input). Expected dimension is 768
        for Frigate semantic search compatibility.
        Providers that support embeddings should override this method.
        """
        logger.warning(
            "%s does not support embeddings. "
            "This method should be overridden by the provider implementation.",
            self.__class__.__name__,
        )
        return []
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@ -1,12 +1,15 @@
 """llama.cpp Provider for Frigate AI."""
 import base64
 import io
 import json
 import logging
 from typing import Any, Optional
 import httpx
 import numpy as np
 import requests
 from PIL import Image
 from frigate.config import GenAIProviderEnum
 from frigate.genai import GenAIClient, register_genai_provider
@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message
 logger = logging.getLogger(__name__)
 def _to_jpeg(img_bytes: bytes) -> bytes | None:
    """Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
    try:
        img = Image.open(io.BytesIO(img_bytes))
        if img.mode != "RGB":
            img = img.convert("RGB")
        buf = io.BytesIO()
        img.save(buf, format="JPEG", quality=85)
        return buf.getvalue()
    except Exception as e:
        logger.warning("Failed to convert image to JPEG: %s", e)
        return None
@register_genai_provider(GenAIProviderEnum.llamacpp)
 class LlamaCppClient(GenAIClient):
    """Generative AI client for Frigate using llama.cpp server."""
@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient):
            )
        return result if result else None
    def embed(
        self,
        texts: list[str] | None = None,
        images: list[bytes] | None = None,
    ) -> list[np.ndarray]:
        """Generate embeddings via llama.cpp /embeddings endpoint.
        Supports batch requests. Uses content format with prompt_string and
        multimodal_data for images (PR #15108). Server must be started with
        --embeddings and --mmproj for multimodal support.
        """
        if self.provider is None:
            logger.warning(
                "llama.cpp provider has not been initialized. Check your llama.cpp configuration."
            )
            return []
        texts = texts or []
        images = images or []
        if not texts and not images:
            return []
        EMBEDDING_DIM = 768
        content = []
        for text in texts:
            content.append({"prompt_string": text})
        for img in images:
            # llama.cpp uses STB which does not support WebP; convert to JPEG
            jpeg_bytes = _to_jpeg(img)
            to_encode = jpeg_bytes if jpeg_bytes is not None else img
            encoded = base64.b64encode(to_encode).decode("utf-8")
            # prompt_string must contain <__media__> placeholder for image tokenization
            content.append(
                {
                    "prompt_string": "<__media__>\n",
                    "multimodal_data": [encoded],
                }
            )
        try:
            response = requests.post(
                f"{self.provider}/embeddings",
                json={"model": self.genai_config.model, "content": content},
                timeout=self.timeout,
            )
            response.raise_for_status()
            result = response.json()
            items = result.get("data", result) if isinstance(result, dict) else result
            if not isinstance(items, list):
                logger.warning("llama.cpp embeddings returned unexpected format")
                return []
            embeddings = []
            for item in items:
                emb = item.get("embedding") if isinstance(item, dict) else None
                if emb is None:
                    logger.warning("llama.cpp embeddings item missing embedding field")
                    continue
                arr = np.array(emb, dtype=np.float32)
                if arr.ndim > 1:
                    # llama.cpp can return token-level embeddings; pool per item
                    arr = arr.mean(axis=0)
                arr = arr.flatten()
                orig_dim = arr.size
                if orig_dim != EMBEDDING_DIM:
                    if orig_dim > EMBEDDING_DIM:
                        arr = arr[:EMBEDDING_DIM]
                        logger.debug(
                            "Truncated llama.cpp embedding from %d to %d dimensions",
                            orig_dim,
                            EMBEDDING_DIM,
                        )
                    else:
                        arr = np.pad(
                            arr,
                            (0, EMBEDDING_DIM - orig_dim),
                            mode="constant",
                            constant_values=0,
                        )
                        logger.debug(
                            "Padded llama.cpp embedding from %d to %d dimensions",
                            orig_dim,
                            EMBEDDING_DIM,
                        )
                embeddings.append(arr)
            return embeddings
        except requests.exceptions.Timeout:
            logger.warning("llama.cpp embeddings request timed out")
            return []
        except requests.exceptions.RequestException as e:
            error_detail = str(e)
            if hasattr(e, "response") and e.response is not None:
                try:
                    error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
                except Exception:
                    pass
            logger.warning("llama.cpp embeddings error: %s", error_detail)
            return []
        except Exception as e:
            logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
            return []
    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
--- a/web/src/lib/const.ts
+++ b/web/src/lib/const.ts
@ -1,3 +1,6 @@
 /** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
 export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
 export const supportedLanguageKeys = [
  "en",
  "es",
--- a/web/src/pages/Explore.tsx
+++ b/web/src/pages/Explore.tsx
@ -23,6 +23,7 @@ import { toast } from "sonner";
 import useSWR from "swr";
 import useSWRInfinite from "swr/infinite";
 import { useDocDomain } from "@/hooks/use-doc-domain";
 import { JINA_EMBEDDING_MODELS } from "@/lib/const";
 const API_LIMIT = 25;
@ -293,7 +294,12 @@ export default function Explore() {
  const modelVersion = config?.semantic_search.model || "jinav1";
  const modelSize = config?.semantic_search.model_size || "small";
-  // Text model state
+  // GenAI providers have no local models to download
  const isGenaiEmbeddings =
    typeof modelVersion === "string" &&
    !(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
  // Text model state (skipped for GenAI - no local models)
  const { payload: textModelState } = useModelState(
    modelVersion === "jinav1"
      ? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@ -328,6 +334,10 @@ export default function Explore() {
  );
  const allModelsLoaded = useMemo(() => {
    if (isGenaiEmbeddings) {
      return true;
    }
    return (
      textModelState === "downloaded" &&
      textTokenizerState === "downloaded" &&
@ -335,6 +345,7 @@ export default function Explore() {
      visionFeatureExtractorState === "downloaded"
    );
  }, [
    isGenaiEmbeddings,
    textModelState,
    textTokenizerState,
    visionModelState,
@ -358,10 +369,11 @@ export default function Explore() {
    !defaultViewLoaded ||
    (config?.semantic_search.enabled &&
      (!reindexState ||
-        !textModelState ||
+        (!isGenaiEmbeddings &&
-        !textTokenizerState ||
+          (!textModelState ||
-        !visionModelState ||
+            !textTokenizerState ||
-        !visionFeatureExtractorState))
+            !visionModelState ||
            !visionFeatureExtractorState))))
  ) {
    return (
      <ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />
Author	SHA1	Message	Date
Nicolas Mowen	334399a260	Handle emb correctly	2026-03-02 14:31:04 -07:00
Nicolas Mowen	ac63be9ea7	Set model	2026-03-02 14:15:04 -07:00
Nicolas Mowen	a8c741a8ce	Don't require download check	2026-03-02 14:15:04 -07:00
Nicolas Mowen	55e4d210cf	Fix sending images	2026-03-02 14:15:04 -07:00
Nicolas Mowen	28cb974e94	undo	2026-03-02 14:15:04 -07:00
Nicolas Mowen	d16bacf96b	Basic docs	2026-03-02 14:15:04 -07:00
Nicolas Mowen	ebd7e8010d	Add support for embedding via genai	2026-03-02 14:15:04 -07:00
Nicolas Mowen	e79a624a15	Add embed API support	2026-03-02 14:15:04 -07:00
Nicolas Mowen	29e2c322e7	Support GenAI for embeddings	2026-03-02 14:15:03 -07:00