diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index 91f435ff0..4a880fd5c 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
 
 :::
 
+### GenAI Provider
+
+Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
+
+To use llama.cpp for semantic search:
+
+1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
+2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
+3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
+
+```yaml
+genai:
+  default:
+    provider: llamacpp
+    base_url: http://localhost:8080
+    model: your-model-name
+    roles:
+      - embeddings
+      - vision
+      - tools
+
+semantic_search:
+  enabled: True
+  model: default
+```
+
+The llama.cpp server must be started with `--embeddings` for the embeddings API, and a multi-modal embeddings model. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
+
+:::note
+
+Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
+
+:::
+
 ### GPU Acceleration
 
 The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
diff --git a/frigate/config/classification.py b/frigate/config/classification.py
index a1e7b89a5..e507a7817 100644
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 from pydantic import ConfigDict, Field
 
@@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel):
         title="Reindex on startup",
         description="Trigger a full reindex of historical tracked objects into the embeddings database.",
     )
-    model: Optional[SemanticSearchModelEnum] = Field(
+    model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
         default=SemanticSearchModelEnum.jinav1,
-        title="Semantic search model",
-        description="The embeddings model to use for semantic search (for example 'jinav1').",
+        title="Semantic search model or GenAI provider name",
+        description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.",
     )
     model_size: str = Field(
         default="small",
diff --git a/frigate/config/config.py b/frigate/config/config.py
index 7e2d0eddc..339d675dc 100644
--- a/frigate/config/config.py
+++ b/frigate/config/config.py
@@ -61,6 +61,7 @@ from .classification import (
     FaceRecognitionConfig,
     LicensePlateRecognitionConfig,
     SemanticSearchConfig,
+    SemanticSearchModelEnum,
 )
 from .database import DatabaseConfig
 from .env import EnvVars
@@ -592,6 +593,24 @@ class FrigateConfig(FrigateBaseModel):
                     )
                 role_to_name[role] = name
 
+        # validate semantic_search.model when it is a GenAI provider name
+        if (
+            self.semantic_search.enabled
+            and isinstance(self.semantic_search.model, str)
+            and not isinstance(self.semantic_search.model, SemanticSearchModelEnum)
+        ):
+            if self.semantic_search.model not in self.genai:
+                raise ValueError(
+                    f"semantic_search.model '{self.semantic_search.model}' is not a "
+                    "valid GenAI config key. Must match a key in genai config."
+                )
+            genai_cfg = self.genai[self.semantic_search.model]
+            if GenAIRoleEnum.embeddings not in genai_cfg.roles:
+                raise ValueError(
+                    f"GenAI provider '{self.semantic_search.model}' must have "
+                    "'embeddings' in its roles for semantic search."
+                )
+
         # set default min_score for object attributes
         for attribute in self.model.all_attributes:
             if not self.objects.filters.get(attribute):
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index 8d7bcd235..d31d1b058 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
 from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
 from frigate.util.file import get_event_thumbnail_bytes
 
+from .genai_embedding import GenAIEmbedding
 from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
 from .onnx.jina_v2_embedding import JinaV2Embedding
 
@@ -73,6 +74,7 @@ class Embeddings:
         config: FrigateConfig,
         db: SqliteVecQueueDatabase,
         metrics: DataProcessorMetrics,
+        genai_manager=None,
     ) -> None:
         self.config = config
         self.db = db
@@ -104,7 +106,27 @@ class Embeddings:
                 },
             )
 
-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        model_cfg = self.config.semantic_search.model
+
+        if not isinstance(model_cfg, SemanticSearchModelEnum):
+            # GenAI provider
+            embeddings_client = (
+                genai_manager.embeddings_client if genai_manager else None
+            )
+            if not embeddings_client:
+                raise ValueError(
+                    f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
+                    "no embeddings client is configured. Ensure the GenAI provider "
+                    "has 'embeddings' in its roles."
+                )
+            self.embedding = GenAIEmbedding(embeddings_client)
+            self.text_embedding = lambda input_data: self.embedding(
+                input_data, embedding_type="text"
+            )
+            self.vision_embedding = lambda input_data: self.embedding(
+                input_data, embedding_type="vision"
+            )
+        elif model_cfg == SemanticSearchModelEnum.jinav2:
             # Single JinaV2Embedding instance for both text and vision
             self.embedding = JinaV2Embedding(
                 model_size=self.config.semantic_search.model_size,
@@ -118,7 +140,8 @@ class Embeddings:
             self.vision_embedding = lambda input_data: self.embedding(
                 input_data, embedding_type="vision"
             )
-        else:  # Default to jinav1
+        else:
+            # Default to jinav1
             self.text_embedding = JinaV1TextEmbedding(
                 model_size=config.semantic_search.model_size,
                 requestor=self.requestor,
@@ -136,8 +159,11 @@ class Embeddings:
         self.metrics.text_embeddings_eps.value = self.text_eps.eps()
 
     def get_model_definitions(self):
-        # Version-specific models
-        if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+        model_cfg = self.config.semantic_search.model
+        if not isinstance(model_cfg, SemanticSearchModelEnum):
+            # GenAI provider: no ONNX models to download
+            models = []
+        elif model_cfg == SemanticSearchModelEnum.jinav2:
             models = [
                 "jinaai/jina-clip-v2-tokenizer",
                 "jinaai/jina-clip-v2-model_fp16.onnx"
@@ -312,11 +338,12 @@ class Embeddings:
         # Get total count of events to process
         total_events = Event.select().count()
 
-        batch_size = (
-            4
-            if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
-            else 32
-        )
+        if not isinstance(self.config.semantic_search.model, SemanticSearchModelEnum):
+            batch_size = 1
+        elif self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
+            batch_size = 4
+        else:
+            batch_size = 32
         current_page = 1
 
         totals = {
diff --git a/frigate/embeddings/genai_embedding.py b/frigate/embeddings/genai_embedding.py
new file mode 100644
index 000000000..d3637bb73
--- /dev/null
+++ b/frigate/embeddings/genai_embedding.py
@@ -0,0 +1,89 @@
+"""GenAI-backed embeddings for semantic search."""
+
+import io
+import logging
+from typing import TYPE_CHECKING
+
+import numpy as np
+from PIL import Image
+
+if TYPE_CHECKING:
+    from frigate.genai import GenAIClient
+
+logger = logging.getLogger(__name__)
+
+EMBEDDING_DIM = 768
+
+
+class GenAIEmbedding:
+    """Embedding adapter that delegates to a GenAI provider's embed API.
+
+    Provides the same interface as JinaV2Embedding for semantic search:
+    __call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
+    normalized to 768 dimensions for Frigate's sqlite-vec schema.
+    """
+
+    def __init__(self, client: "GenAIClient") -> None:
+        self.client = client
+
+    def __call__(
+        self,
+        inputs: list[str] | list[bytes] | list[Image.Image],
+        embedding_type: str = "text",
+    ) -> list[np.ndarray]:
+        """Generate embeddings for text or images.
+
+        Args:
+            inputs: List of strings (text) or bytes/PIL images (vision).
+            embedding_type: "text" or "vision".
+
+        Returns:
+            List of 768-dim numpy float32 arrays.
+        """
+        if not inputs:
+            return []
+
+        if embedding_type == "text":
+            texts = [str(x) for x in inputs]
+            embeddings = self.client.embed(texts=texts)
+        elif embedding_type == "vision":
+            images: list[bytes] = []
+            for inp in inputs:
+                if isinstance(inp, bytes):
+                    images.append(inp)
+                elif isinstance(inp, Image.Image):
+                    buf = io.BytesIO()
+                    inp.convert("RGB").save(buf, format="JPEG")
+                    images.append(buf.getvalue())
+                else:
+                    logger.warning(
+                        "GenAIEmbedding: skipping unsupported vision input type %s",
+                        type(inp).__name__,
+                    )
+            if not images:
+                return []
+            embeddings = self.client.embed(images=images)
+        else:
+            raise ValueError(
+                f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
+            )
+
+        result = []
+        for emb in embeddings:
+            arr = np.asarray(emb, dtype=np.float32)
+            if arr.ndim > 1:
+                # Some providers return token-level embeddings; pool to one vector.
+                arr = arr.mean(axis=0)
+            arr = arr.flatten()
+            if arr.size != EMBEDDING_DIM:
+                if arr.size > EMBEDDING_DIM:
+                    arr = arr[:EMBEDDING_DIM]
+                else:
+                    arr = np.pad(
+                        arr,
+                        (0, EMBEDDING_DIM - arr.size),
+                        mode="constant",
+                        constant_values=0,
+                    )
+            result.append(arr)
+        return result
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index b85f231c0..533f79e50 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -123,8 +123,10 @@ class EmbeddingMaintainer(threading.Thread):
         models = [Event, Recordings, ReviewSegment, Trigger]
         db.bind(models)
 
+        self.genai_manager = GenAIClientManager(config)
+
         if config.semantic_search.enabled:
-            self.embeddings = Embeddings(config, db, metrics)
+            self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
 
             # Check if we need to re-index events
             if config.semantic_search.reindex:
@@ -151,7 +153,6 @@ class EmbeddingMaintainer(threading.Thread):
         self.frame_manager = SharedMemoryFrameManager()
 
         self.detected_license_plates: dict[str, dict[str, Any]] = {}
-        self.genai_manager = GenAIClientManager(config)
 
         # model runners to share between realtime and post processors
         if self.config.lpr.enabled:
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index f52a19e45..fa90f1463 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -7,6 +7,7 @@ import os
 import re
 from typing import Any, Optional
 
+import numpy as np
 from playhouse.shortcuts import model_to_dict
 
 from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
@@ -304,6 +305,25 @@ Guidelines:
         """Get the context window size for this provider in tokens."""
         return 4096
 
+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings for text and/or images.
+
+        Returns list of numpy arrays (one per input). Expected dimension is 768
+        for Frigate semantic search compatibility.
+
+        Providers that support embeddings should override this method.
+        """
+        logger.warning(
+            "%s does not support embeddings. "
+            "This method should be overridden by the provider implementation.",
+            self.__class__.__name__,
+        )
+        return []
+
     def chat_with_tools(
         self,
         messages: list[dict[str, Any]],
diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py
index 24dcea2fb..f9c251790 100644
--- a/frigate/genai/llama_cpp.py
+++ b/frigate/genai/llama_cpp.py
@@ -1,12 +1,15 @@
 """llama.cpp Provider for Frigate AI."""
 
 import base64
+import io
 import json
 import logging
 from typing import Any, Optional
 
 import httpx
+import numpy as np
 import requests
+from PIL import Image
 
 from frigate.config import GenAIProviderEnum
 from frigate.genai import GenAIClient, register_genai_provider
@@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message
 logger = logging.getLogger(__name__)
 
 
+def _to_jpeg(img_bytes: bytes) -> bytes | None:
+    """Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
+    try:
+        img = Image.open(io.BytesIO(img_bytes))
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        buf = io.BytesIO()
+        img.save(buf, format="JPEG", quality=85)
+        return buf.getvalue()
+    except Exception as e:
+        logger.warning("Failed to convert image to JPEG: %s", e)
+        return None
+
+
 @register_genai_provider(GenAIProviderEnum.llamacpp)
 class LlamaCppClient(GenAIClient):
     """Generative AI client for Frigate using llama.cpp server."""
@@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient):
             )
         return result if result else None
 
+    def embed(
+        self,
+        texts: list[str] | None = None,
+        images: list[bytes] | None = None,
+    ) -> list[np.ndarray]:
+        """Generate embeddings via llama.cpp /embeddings endpoint.
+
+        Supports batch requests. Uses content format with prompt_string and
+        multimodal_data for images (PR #15108). Server must be started with
+        --embeddings and --mmproj for multimodal support.
+        """
+        if self.provider is None:
+            logger.warning(
+                "llama.cpp provider has not been initialized. Check your llama.cpp configuration."
+            )
+            return []
+
+        texts = texts or []
+        images = images or []
+        if not texts and not images:
+            return []
+
+        EMBEDDING_DIM = 768
+
+        content = []
+        for text in texts:
+            content.append({"prompt_string": text})
+        for img in images:
+            # llama.cpp uses STB which does not support WebP; convert to JPEG
+            jpeg_bytes = _to_jpeg(img)
+            to_encode = jpeg_bytes if jpeg_bytes is not None else img
+            encoded = base64.b64encode(to_encode).decode("utf-8")
+            # prompt_string must contain <__media__> placeholder for image tokenization
+            content.append(
+                {
+                    "prompt_string": "<__media__>\n",
+                    "multimodal_data": [encoded],
+                }
+            )
+
+        try:
+            response = requests.post(
+                f"{self.provider}/embeddings",
+                json={"model": self.genai_config.model, "content": content},
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            items = result.get("data", result) if isinstance(result, dict) else result
+            if not isinstance(items, list):
+                logger.warning("llama.cpp embeddings returned unexpected format")
+                return []
+
+            embeddings = []
+            for item in items:
+                emb = item.get("embedding") if isinstance(item, dict) else None
+                if emb is None:
+                    logger.warning("llama.cpp embeddings item missing embedding field")
+                    continue
+                arr = np.array(emb, dtype=np.float32)
+                if arr.ndim > 1:
+                    # llama.cpp can return token-level embeddings; pool per item
+                    arr = arr.mean(axis=0)
+                arr = arr.flatten()
+                orig_dim = arr.size
+                if orig_dim != EMBEDDING_DIM:
+                    if orig_dim > EMBEDDING_DIM:
+                        arr = arr[:EMBEDDING_DIM]
+                        logger.debug(
+                            "Truncated llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                    else:
+                        arr = np.pad(
+                            arr,
+                            (0, EMBEDDING_DIM - orig_dim),
+                            mode="constant",
+                            constant_values=0,
+                        )
+                        logger.debug(
+                            "Padded llama.cpp embedding from %d to %d dimensions",
+                            orig_dim,
+                            EMBEDDING_DIM,
+                        )
+                embeddings.append(arr)
+            return embeddings
+        except requests.exceptions.Timeout:
+            logger.warning("llama.cpp embeddings request timed out")
+            return []
+        except requests.exceptions.RequestException as e:
+            error_detail = str(e)
+            if hasattr(e, "response") and e.response is not None:
+                try:
+                    error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
+                except Exception:
+                    pass
+            logger.warning("llama.cpp embeddings error: %s", error_detail)
+            return []
+        except Exception as e:
+            logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
+            return []
+
     def chat_with_tools(
         self,
         messages: list[dict[str, Any]],
diff --git a/web/src/lib/const.ts b/web/src/lib/const.ts
index 55515f2ae..5000d7a0b 100644
--- a/web/src/lib/const.ts
+++ b/web/src/lib/const.ts
@@ -1,3 +1,6 @@
+/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
+export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
+
 export const supportedLanguageKeys = [
   "en",
   "es",
diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx
index 8f50e982e..35860ed35 100644
--- a/web/src/pages/Explore.tsx
+++ b/web/src/pages/Explore.tsx
@@ -23,6 +23,7 @@ import { toast } from "sonner";
 import useSWR from "swr";
 import useSWRInfinite from "swr/infinite";
 import { useDocDomain } from "@/hooks/use-doc-domain";
+import { JINA_EMBEDDING_MODELS } from "@/lib/const";
 
 const API_LIMIT = 25;
 
@@ -293,7 +294,12 @@ export default function Explore() {
   const modelVersion = config?.semantic_search.model || "jinav1";
   const modelSize = config?.semantic_search.model_size || "small";
 
-  // Text model state
+  // GenAI providers have no local models to download
+  const isGenaiEmbeddings =
+    typeof modelVersion === "string" &&
+    !(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
+
+  // Text model state (skipped for GenAI - no local models)
   const { payload: textModelState } = useModelState(
     modelVersion === "jinav1"
       ? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@@ -328,6 +334,10 @@ export default function Explore() {
   );
 
   const allModelsLoaded = useMemo(() => {
+    if (isGenaiEmbeddings) {
+      return true;
+    }
+
     return (
       textModelState === "downloaded" &&
       textTokenizerState === "downloaded" &&
@@ -335,6 +345,7 @@ export default function Explore() {
       visionFeatureExtractorState === "downloaded"
     );
   }, [
+    isGenaiEmbeddings,
     textModelState,
     textTokenizerState,
     visionModelState,
@@ -358,10 +369,11 @@ export default function Explore() {
     !defaultViewLoaded ||
     (config?.semantic_search.enabled &&
       (!reindexState ||
-        !textModelState ||
-        !textTokenizerState ||
-        !visionModelState ||
-        !visionFeatureExtractorState))
+        (!isGenaiEmbeddings &&
+          (!textModelState ||
+            !textTokenizerState ||
+            !visionModelState ||
+            !visionFeatureExtractorState))))
   ) {
     return (
       <ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />