diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md index 91f435ff0..4a880fd5c 100644 --- a/docs/docs/configuration/semantic_search.md +++ b/docs/docs/configuration/semantic_search.md @@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings ::: +### GenAI Provider + +Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images). + +To use llama.cpp for semantic search: + +1. Configure a GenAI provider in your config with `embeddings` in its `roles`. +2. Set `semantic_search.model` to the GenAI config key (e.g. `default`). +3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support: + +```yaml +genai: + default: + provider: llamacpp + base_url: http://localhost:8080 + model: your-model-name + roles: + - embeddings + - vision + - tools + +semantic_search: + enabled: True + model: default +``` + +The llama.cpp server must be started with `--embeddings` for the embeddings API, and a multi-modal embeddings model. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details. + +:::note + +Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible. + +::: + ### GPU Acceleration The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation. diff --git a/frigate/config/classification.py b/frigate/config/classification.py index a1e7b89a5..e507a7817 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from pydantic import ConfigDict, Field @@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel): title="Reindex on startup", description="Trigger a full reindex of historical tracked objects into the embeddings database.", ) - model: Optional[SemanticSearchModelEnum] = Field( + model: Optional[Union[SemanticSearchModelEnum, str]] = Field( default=SemanticSearchModelEnum.jinav1, - title="Semantic search model", - description="The embeddings model to use for semantic search (for example 'jinav1').", + title="Semantic search model or GenAI provider name", + description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.", ) model_size: str = Field( default="small", diff --git a/frigate/config/config.py b/frigate/config/config.py index 7e2d0eddc..339d675dc 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -61,6 +61,7 @@ from .classification import ( FaceRecognitionConfig, LicensePlateRecognitionConfig, SemanticSearchConfig, + SemanticSearchModelEnum, ) from .database import DatabaseConfig from .env import EnvVars @@ -592,6 +593,24 @@ class FrigateConfig(FrigateBaseModel): ) role_to_name[role] = name + # validate semantic_search.model when it is a GenAI provider name + if ( + self.semantic_search.enabled + and isinstance(self.semantic_search.model, str) + and not isinstance(self.semantic_search.model, SemanticSearchModelEnum) + ): + if self.semantic_search.model not in self.genai: + raise ValueError( + f"semantic_search.model '{self.semantic_search.model}' is not a " + "valid GenAI config key. Must match a key in genai config." + ) + genai_cfg = self.genai[self.semantic_search.model] + if GenAIRoleEnum.embeddings not in genai_cfg.roles: + raise ValueError( + f"GenAI provider '{self.semantic_search.model}' must have " + "'embeddings' in its roles for semantic search." + ) + # set default min_score for object attributes for attribute in self.model.all_attributes: if not self.objects.filters.get(attribute): diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 8d7bcd235..d31d1b058 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize from frigate.util.file import get_event_thumbnail_bytes +from .genai_embedding import GenAIEmbedding from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding from .onnx.jina_v2_embedding import JinaV2Embedding @@ -73,6 +74,7 @@ class Embeddings: config: FrigateConfig, db: SqliteVecQueueDatabase, metrics: DataProcessorMetrics, + genai_manager=None, ) -> None: self.config = config self.db = db @@ -104,7 +106,27 @@ class Embeddings: }, ) - if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: + model_cfg = self.config.semantic_search.model + + if not isinstance(model_cfg, SemanticSearchModelEnum): + # GenAI provider + embeddings_client = ( + genai_manager.embeddings_client if genai_manager else None + ) + if not embeddings_client: + raise ValueError( + f"semantic_search.model is '{model_cfg}' (GenAI provider) but " + "no embeddings client is configured. Ensure the GenAI provider " + "has 'embeddings' in its roles." + ) + self.embedding = GenAIEmbedding(embeddings_client) + self.text_embedding = lambda input_data: self.embedding( + input_data, embedding_type="text" + ) + self.vision_embedding = lambda input_data: self.embedding( + input_data, embedding_type="vision" + ) + elif model_cfg == SemanticSearchModelEnum.jinav2: # Single JinaV2Embedding instance for both text and vision self.embedding = JinaV2Embedding( model_size=self.config.semantic_search.model_size, @@ -118,7 +140,8 @@ class Embeddings: self.vision_embedding = lambda input_data: self.embedding( input_data, embedding_type="vision" ) - else: # Default to jinav1 + else: + # Default to jinav1 self.text_embedding = JinaV1TextEmbedding( model_size=config.semantic_search.model_size, requestor=self.requestor, @@ -136,8 +159,11 @@ class Embeddings: self.metrics.text_embeddings_eps.value = self.text_eps.eps() def get_model_definitions(self): - # Version-specific models - if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: + model_cfg = self.config.semantic_search.model + if not isinstance(model_cfg, SemanticSearchModelEnum): + # GenAI provider: no ONNX models to download + models = [] + elif model_cfg == SemanticSearchModelEnum.jinav2: models = [ "jinaai/jina-clip-v2-tokenizer", "jinaai/jina-clip-v2-model_fp16.onnx" @@ -312,11 +338,12 @@ class Embeddings: # Get total count of events to process total_events = Event.select().count() - batch_size = ( - 4 - if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2 - else 32 - ) + if not isinstance(self.config.semantic_search.model, SemanticSearchModelEnum): + batch_size = 1 + elif self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: + batch_size = 4 + else: + batch_size = 32 current_page = 1 totals = { diff --git a/frigate/embeddings/genai_embedding.py b/frigate/embeddings/genai_embedding.py new file mode 100644 index 000000000..d3637bb73 --- /dev/null +++ b/frigate/embeddings/genai_embedding.py @@ -0,0 +1,89 @@ +"""GenAI-backed embeddings for semantic search.""" + +import io +import logging +from typing import TYPE_CHECKING + +import numpy as np +from PIL import Image + +if TYPE_CHECKING: + from frigate.genai import GenAIClient + +logger = logging.getLogger(__name__) + +EMBEDDING_DIM = 768 + + +class GenAIEmbedding: + """Embedding adapter that delegates to a GenAI provider's embed API. + + Provides the same interface as JinaV2Embedding for semantic search: + __call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are + normalized to 768 dimensions for Frigate's sqlite-vec schema. + """ + + def __init__(self, client: "GenAIClient") -> None: + self.client = client + + def __call__( + self, + inputs: list[str] | list[bytes] | list[Image.Image], + embedding_type: str = "text", + ) -> list[np.ndarray]: + """Generate embeddings for text or images. + + Args: + inputs: List of strings (text) or bytes/PIL images (vision). + embedding_type: "text" or "vision". + + Returns: + List of 768-dim numpy float32 arrays. + """ + if not inputs: + return [] + + if embedding_type == "text": + texts = [str(x) for x in inputs] + embeddings = self.client.embed(texts=texts) + elif embedding_type == "vision": + images: list[bytes] = [] + for inp in inputs: + if isinstance(inp, bytes): + images.append(inp) + elif isinstance(inp, Image.Image): + buf = io.BytesIO() + inp.convert("RGB").save(buf, format="JPEG") + images.append(buf.getvalue()) + else: + logger.warning( + "GenAIEmbedding: skipping unsupported vision input type %s", + type(inp).__name__, + ) + if not images: + return [] + embeddings = self.client.embed(images=images) + else: + raise ValueError( + f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'." + ) + + result = [] + for emb in embeddings: + arr = np.asarray(emb, dtype=np.float32) + if arr.ndim > 1: + # Some providers return token-level embeddings; pool to one vector. + arr = arr.mean(axis=0) + arr = arr.flatten() + if arr.size != EMBEDDING_DIM: + if arr.size > EMBEDDING_DIM: + arr = arr[:EMBEDDING_DIM] + else: + arr = np.pad( + arr, + (0, EMBEDDING_DIM - arr.size), + mode="constant", + constant_values=0, + ) + result.append(arr) + return result diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index b85f231c0..533f79e50 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -123,8 +123,10 @@ class EmbeddingMaintainer(threading.Thread): models = [Event, Recordings, ReviewSegment, Trigger] db.bind(models) + self.genai_manager = GenAIClientManager(config) + if config.semantic_search.enabled: - self.embeddings = Embeddings(config, db, metrics) + self.embeddings = Embeddings(config, db, metrics, self.genai_manager) # Check if we need to re-index events if config.semantic_search.reindex: @@ -151,7 +153,6 @@ class EmbeddingMaintainer(threading.Thread): self.frame_manager = SharedMemoryFrameManager() self.detected_license_plates: dict[str, dict[str, Any]] = {} - self.genai_manager = GenAIClientManager(config) # model runners to share between realtime and post processors if self.config.lpr.enabled: diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index f52a19e45..fa90f1463 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -7,6 +7,7 @@ import os import re from typing import Any, Optional +import numpy as np from playhouse.shortcuts import model_to_dict from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum @@ -304,6 +305,25 @@ Guidelines: """Get the context window size for this provider in tokens.""" return 4096 + def embed( + self, + texts: list[str] | None = None, + images: list[bytes] | None = None, + ) -> list[np.ndarray]: + """Generate embeddings for text and/or images. + + Returns list of numpy arrays (one per input). Expected dimension is 768 + for Frigate semantic search compatibility. + + Providers that support embeddings should override this method. + """ + logger.warning( + "%s does not support embeddings. " + "This method should be overridden by the provider implementation.", + self.__class__.__name__, + ) + return [] + def chat_with_tools( self, messages: list[dict[str, Any]], diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py index 24dcea2fb..f9c251790 100644 --- a/frigate/genai/llama_cpp.py +++ b/frigate/genai/llama_cpp.py @@ -1,12 +1,15 @@ """llama.cpp Provider for Frigate AI.""" import base64 +import io import json import logging from typing import Any, Optional import httpx +import numpy as np import requests +from PIL import Image from frigate.config import GenAIProviderEnum from frigate.genai import GenAIClient, register_genai_provider @@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message logger = logging.getLogger(__name__) +def _to_jpeg(img_bytes: bytes) -> bytes | None: + """Convert image bytes to JPEG. llama.cpp/STB does not support WebP.""" + try: + img = Image.open(io.BytesIO(img_bytes)) + if img.mode != "RGB": + img = img.convert("RGB") + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=85) + return buf.getvalue() + except Exception as e: + logger.warning("Failed to convert image to JPEG: %s", e) + return None + + @register_genai_provider(GenAIProviderEnum.llamacpp) class LlamaCppClient(GenAIClient): """Generative AI client for Frigate using llama.cpp server.""" @@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient): ) return result if result else None + def embed( + self, + texts: list[str] | None = None, + images: list[bytes] | None = None, + ) -> list[np.ndarray]: + """Generate embeddings via llama.cpp /embeddings endpoint. + + Supports batch requests. Uses content format with prompt_string and + multimodal_data for images (PR #15108). Server must be started with + --embeddings and --mmproj for multimodal support. + """ + if self.provider is None: + logger.warning( + "llama.cpp provider has not been initialized. Check your llama.cpp configuration." + ) + return [] + + texts = texts or [] + images = images or [] + if not texts and not images: + return [] + + EMBEDDING_DIM = 768 + + content = [] + for text in texts: + content.append({"prompt_string": text}) + for img in images: + # llama.cpp uses STB which does not support WebP; convert to JPEG + jpeg_bytes = _to_jpeg(img) + to_encode = jpeg_bytes if jpeg_bytes is not None else img + encoded = base64.b64encode(to_encode).decode("utf-8") + # prompt_string must contain <__media__> placeholder for image tokenization + content.append( + { + "prompt_string": "<__media__>\n", + "multimodal_data": [encoded], + } + ) + + try: + response = requests.post( + f"{self.provider}/embeddings", + json={"model": self.genai_config.model, "content": content}, + timeout=self.timeout, + ) + response.raise_for_status() + result = response.json() + + items = result.get("data", result) if isinstance(result, dict) else result + if not isinstance(items, list): + logger.warning("llama.cpp embeddings returned unexpected format") + return [] + + embeddings = [] + for item in items: + emb = item.get("embedding") if isinstance(item, dict) else None + if emb is None: + logger.warning("llama.cpp embeddings item missing embedding field") + continue + arr = np.array(emb, dtype=np.float32) + if arr.ndim > 1: + # llama.cpp can return token-level embeddings; pool per item + arr = arr.mean(axis=0) + arr = arr.flatten() + orig_dim = arr.size + if orig_dim != EMBEDDING_DIM: + if orig_dim > EMBEDDING_DIM: + arr = arr[:EMBEDDING_DIM] + logger.debug( + "Truncated llama.cpp embedding from %d to %d dimensions", + orig_dim, + EMBEDDING_DIM, + ) + else: + arr = np.pad( + arr, + (0, EMBEDDING_DIM - orig_dim), + mode="constant", + constant_values=0, + ) + logger.debug( + "Padded llama.cpp embedding from %d to %d dimensions", + orig_dim, + EMBEDDING_DIM, + ) + embeddings.append(arr) + return embeddings + except requests.exceptions.Timeout: + logger.warning("llama.cpp embeddings request timed out") + return [] + except requests.exceptions.RequestException as e: + error_detail = str(e) + if hasattr(e, "response") and e.response is not None: + try: + error_detail = f"{str(e)} - Response: {e.response.text[:500]}" + except Exception: + pass + logger.warning("llama.cpp embeddings error: %s", error_detail) + return [] + except Exception as e: + logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e)) + return [] + def chat_with_tools( self, messages: list[dict[str, Any]], diff --git a/web/src/lib/const.ts b/web/src/lib/const.ts index 55515f2ae..5000d7a0b 100644 --- a/web/src/lib/const.ts +++ b/web/src/lib/const.ts @@ -1,3 +1,6 @@ +/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */ +export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const; + export const supportedLanguageKeys = [ "en", "es", diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 8f50e982e..35860ed35 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -23,6 +23,7 @@ import { toast } from "sonner"; import useSWR from "swr"; import useSWRInfinite from "swr/infinite"; import { useDocDomain } from "@/hooks/use-doc-domain"; +import { JINA_EMBEDDING_MODELS } from "@/lib/const"; const API_LIMIT = 25; @@ -293,7 +294,12 @@ export default function Explore() { const modelVersion = config?.semantic_search.model || "jinav1"; const modelSize = config?.semantic_search.model_size || "small"; - // Text model state + // GenAI providers have no local models to download + const isGenaiEmbeddings = + typeof modelVersion === "string" && + !(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion); + + // Text model state (skipped for GenAI - no local models) const { payload: textModelState } = useModelState( modelVersion === "jinav1" ? "jinaai/jina-clip-v1-text_model_fp16.onnx" @@ -328,6 +334,10 @@ export default function Explore() { ); const allModelsLoaded = useMemo(() => { + if (isGenaiEmbeddings) { + return true; + } + return ( textModelState === "downloaded" && textTokenizerState === "downloaded" && @@ -335,6 +345,7 @@ export default function Explore() { visionFeatureExtractorState === "downloaded" ); }, [ + isGenaiEmbeddings, textModelState, textTokenizerState, visionModelState, @@ -358,10 +369,11 @@ export default function Explore() { !defaultViewLoaded || (config?.semantic_search.enabled && (!reindexState || - !textModelState || - !textTokenizerState || - !visionModelState || - !visionFeatureExtractorState)) + (!isGenaiEmbeddings && + (!textModelState || + !textTokenizerState || + !visionModelState || + !visionFeatureExtractorState)))) ) { return (