From 37dc0fd7cb6eecb6ac6b4c8e23c41f917a8b8646 Mon Sep 17 00:00:00 2001
From: user <wozz@koh.ms>
Date: Wed, 10 Dec 2025 14:06:08 -0500
Subject: [PATCH] add jina clip as service provider

---
 docs/docs/configuration/semantic_search.md   | 23 +++++-
 frigate/config/classification.py             |  1 +
 frigate/embeddings/remote/clip_as_service.py | 81 ++++++++++++++++++++
 3 files changed, 102 insertions(+), 3 deletions(-)
 create mode 100644 frigate/embeddings/remote/clip_as_service.py

diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index 3801ff0b1..498d1ab67 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -82,13 +82,15 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
 
 ### Remote Providers
 
-Frigate can be configured to use remote services for generating embeddings. This is done by setting the `provider` field to `openai` or `ollama`.
+Frigate can be configured to use remote services for generating embeddings. This is done by setting the `provider` field to `openai`, `ollama`, or `clip_as_service`.
 
-For vision embeddings, remote providers use a two-step process:
+#### OpenAI and Ollama
+
+For OpenAI and Ollama, vision embeddings use a two-step process:
 1. A text description of the image is generated using the configured GenAI provider.
 2. An embedding is created from that description using the configured remote embedding provider.
 
-This means that you must have a GenAI provider configured to use vision embeddings with a remote provider.
+This means that you must have a GenAI provider configured to use vision embeddings with these providers.
 
 ```yaml
 semantic_search:
@@ -99,6 +101,21 @@ semantic_search:
     vision_model_prompt: "A detailed description of the image for semantic search."
 ```
 
+#### Jina CLIP-as-Service
+
+Frigate supports [Jina CLIP-as-Service](https://clip-as-service.jina.ai/) which provides a multi-modal embedding service that can be hosted locally or remotely. This provider supports both text and image embeddings directly, without requiring a separate GenAI provider for image descriptions.
+
+You can run CLIP-as-Service using their [getting started guide](https://clip-as-service.jina.ai/user-guides/server/).
+
+```yaml
+semantic_search:
+  enabled: True
+  provider: clip_as_service
+  remote:
+    url: "http://localhost:51000"
+    # model is typically handled by the service configuration
+```
+
 ### GPU Acceleration
 
 The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
diff --git a/frigate/config/classification.py b/frigate/config/classification.py
index 474a43fe1..e77eb7a3f 100644
--- a/frigate/config/classification.py
+++ b/frigate/config/classification.py
@@ -118,6 +118,7 @@ class SemanticSearchProviderEnum(str, Enum):
     local = "local"
     openai = "openai"
     ollama = "ollama"
+    clip_as_service = "clip_as_service"
 
 
 class RemoteSemanticSearchConfig(FrigateBaseModel):
diff --git a/frigate/embeddings/remote/clip_as_service.py b/frigate/embeddings/remote/clip_as_service.py
new file mode 100644
index 000000000..26946ec78
--- /dev/null
+++ b/frigate/embeddings/remote/clip_as_service.py
@@ -0,0 +1,81 @@
+"""Clip-as-service embedding client for Frigate."""
+
+import base64
+import logging
+from typing import Optional
+
+import requests
+
+from frigate.config import SemanticSearchProviderEnum
+from frigate.embeddings.remote import (
+    RemoteEmbeddingClient,
+    register_embedding_provider,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@register_embedding_provider(SemanticSearchProviderEnum.clip_as_service)
+class ClipAsServiceEmbeddingClient(RemoteEmbeddingClient):
+    """Remote embedding client for Frigate using clip-as-service."""
+
+    def _init_provider(self):
+        """Initialize the client."""
+        return True
+
+    def embed_texts(self, texts: list[str]) -> Optional[list[list[float]]]:
+        """Get embeddings for a list of texts."""
+        if not self.config.semantic_search.remote.url:
+            logger.error("Clip-as-service URL is not configured.")
+            return None
+
+        payload = {
+            "data": [{"text": t} for t in texts],
+            "exec_endpoint": "/"
+        }
+
+        try:
+            response = requests.post(
+                f"{self.config.semantic_search.remote.url}/post",
+                json=payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            data = response.json()
+            if "data" in data:
+                return [item["embedding"] for item in data["data"]]
+            return None
+        except Exception as e:
+            logger.warning("Clip-as-service error: %s", str(e))
+            return None
+
+    def embed_images(self, images: list[bytes]) -> Optional[list[list[float]]]:
+        """Get embeddings for a list of images."""
+        if not self.config.semantic_search.remote.url:
+            logger.error("Clip-as-service URL is not configured.")
+            return None
+
+        payload_data = []
+        for img_bytes in images:
+            b64_str = base64.b64encode(img_bytes).decode("utf-8")
+            payload_data.append({"blob": b64_str})
+
+        payload = {
+            "data": payload_data,
+            "exec_endpoint": "/"
+        }
+
+        try:
+            response = requests.post(
+                f"{self.config.semantic_search.remote.url}/post",
+                json=payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            data = response.json()
+            if "data" in data:
+                return [item["embedding"] for item in data["data"]]
+            return None
+        except Exception as e:
+            logger.warning("Clip-as-service error: %s", str(e))
+            return None