From 37dc0fd7cb6eecb6ac6b4c8e23c41f917a8b8646 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 10 Dec 2025 14:06:08 -0500 Subject: [PATCH] add jina clip as service provider --- docs/docs/configuration/semantic_search.md | 23 +++++- frigate/config/classification.py | 1 + frigate/embeddings/remote/clip_as_service.py | 81 ++++++++++++++++++++ 3 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 frigate/embeddings/remote/clip_as_service.py diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md index 3801ff0b1..498d1ab67 100644 --- a/docs/docs/configuration/semantic_search.md +++ b/docs/docs/configuration/semantic_search.md @@ -82,13 +82,15 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings ### Remote Providers -Frigate can be configured to use remote services for generating embeddings. This is done by setting the `provider` field to `openai` or `ollama`. +Frigate can be configured to use remote services for generating embeddings. This is done by setting the `provider` field to `openai`, `ollama`, or `clip_as_service`. -For vision embeddings, remote providers use a two-step process: +#### OpenAI and Ollama + +For OpenAI and Ollama, vision embeddings use a two-step process: 1. A text description of the image is generated using the configured GenAI provider. 2. An embedding is created from that description using the configured remote embedding provider. -This means that you must have a GenAI provider configured to use vision embeddings with a remote provider. +This means that you must have a GenAI provider configured to use vision embeddings with these providers. ```yaml semantic_search: @@ -99,6 +101,21 @@ semantic_search: vision_model_prompt: "A detailed description of the image for semantic search." ``` +#### Jina CLIP-as-Service + +Frigate supports [Jina CLIP-as-Service](https://clip-as-service.jina.ai/) which provides a multi-modal embedding service that can be hosted locally or remotely. This provider supports both text and image embeddings directly, without requiring a separate GenAI provider for image descriptions. + +You can run CLIP-as-Service using their [getting started guide](https://clip-as-service.jina.ai/user-guides/server/). + +```yaml +semantic_search: + enabled: True + provider: clip_as_service + remote: + url: "http://localhost:51000" + # model is typically handled by the service configuration +``` + ### GPU Acceleration The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation. diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 474a43fe1..e77eb7a3f 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -118,6 +118,7 @@ class SemanticSearchProviderEnum(str, Enum): local = "local" openai = "openai" ollama = "ollama" + clip_as_service = "clip_as_service" class RemoteSemanticSearchConfig(FrigateBaseModel): diff --git a/frigate/embeddings/remote/clip_as_service.py b/frigate/embeddings/remote/clip_as_service.py new file mode 100644 index 000000000..26946ec78 --- /dev/null +++ b/frigate/embeddings/remote/clip_as_service.py @@ -0,0 +1,81 @@ +"""Clip-as-service embedding client for Frigate.""" + +import base64 +import logging +from typing import Optional + +import requests + +from frigate.config import SemanticSearchProviderEnum +from frigate.embeddings.remote import ( + RemoteEmbeddingClient, + register_embedding_provider, +) + +logger = logging.getLogger(__name__) + + +@register_embedding_provider(SemanticSearchProviderEnum.clip_as_service) +class ClipAsServiceEmbeddingClient(RemoteEmbeddingClient): + """Remote embedding client for Frigate using clip-as-service.""" + + def _init_provider(self): + """Initialize the client.""" + return True + + def embed_texts(self, texts: list[str]) -> Optional[list[list[float]]]: + """Get embeddings for a list of texts.""" + if not self.config.semantic_search.remote.url: + logger.error("Clip-as-service URL is not configured.") + return None + + payload = { + "data": [{"text": t} for t in texts], + "exec_endpoint": "/" + } + + try: + response = requests.post( + f"{self.config.semantic_search.remote.url}/post", + json=payload, + timeout=self.timeout, + ) + response.raise_for_status() + data = response.json() + if "data" in data: + return [item["embedding"] for item in data["data"]] + return None + except Exception as e: + logger.warning("Clip-as-service error: %s", str(e)) + return None + + def embed_images(self, images: list[bytes]) -> Optional[list[list[float]]]: + """Get embeddings for a list of images.""" + if not self.config.semantic_search.remote.url: + logger.error("Clip-as-service URL is not configured.") + return None + + payload_data = [] + for img_bytes in images: + b64_str = base64.b64encode(img_bytes).decode("utf-8") + payload_data.append({"blob": b64_str}) + + payload = { + "data": payload_data, + "exec_endpoint": "/" + } + + try: + response = requests.post( + f"{self.config.semantic_search.remote.url}/post", + json=payload, + timeout=self.timeout, + ) + response.raise_for_status() + data = response.json() + if "data" in data: + return [item["embedding"] for item in data["data"]] + return None + except Exception as e: + logger.warning("Clip-as-service error: %s", str(e)) + return None