mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-10 10:33:11 +03:00
Compare commits
9 Commits
334c8efce0
...
334399a260
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
334399a260 | ||
|
|
ac63be9ea7 | ||
|
|
a8c741a8ce | ||
|
|
55e4d210cf | ||
|
|
28cb974e94 | ||
|
|
d16bacf96b | ||
|
|
ebd7e8010d | ||
|
|
e79a624a15 | ||
|
|
29e2c322e7 |
@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
|
|||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
### GenAI Provider (llama.cpp)
|
||||||
|
|
||||||
|
Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
|
||||||
|
|
||||||
|
To use llama.cpp for semantic search:
|
||||||
|
|
||||||
|
1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
|
||||||
|
2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
|
||||||
|
3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
genai:
|
||||||
|
default:
|
||||||
|
provider: llamacpp
|
||||||
|
base_url: http://localhost:8080
|
||||||
|
model: your-model-name
|
||||||
|
roles:
|
||||||
|
- embeddings
|
||||||
|
- vision
|
||||||
|
- tools
|
||||||
|
|
||||||
|
semantic_search:
|
||||||
|
enabled: True
|
||||||
|
model: default
|
||||||
|
```
|
||||||
|
|
||||||
|
The llama.cpp server must be started with `--embeddings` for the embeddings API, and `--mmproj <mmproj.gguf>` when using image embeddings. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
|
||||||
|
|
||||||
|
:::note
|
||||||
|
|
||||||
|
Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
### GPU Acceleration
|
### GPU Acceleration
|
||||||
|
|
||||||
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
|
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
from pydantic import ConfigDict, Field
|
from pydantic import ConfigDict, Field
|
||||||
|
|
||||||
@ -173,10 +173,10 @@ class SemanticSearchConfig(FrigateBaseModel):
|
|||||||
title="Reindex on startup",
|
title="Reindex on startup",
|
||||||
description="Trigger a full reindex of historical tracked objects into the embeddings database.",
|
description="Trigger a full reindex of historical tracked objects into the embeddings database.",
|
||||||
)
|
)
|
||||||
model: Optional[SemanticSearchModelEnum] = Field(
|
model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
|
||||||
default=SemanticSearchModelEnum.jinav1,
|
default=SemanticSearchModelEnum.jinav1,
|
||||||
title="Semantic search model",
|
title="Semantic search model or GenAI provider name",
|
||||||
description="The embeddings model to use for semantic search (for example 'jinav1').",
|
description="The embeddings model to use for semantic search (for example 'jinav1'), or the name of a GenAI provider with the embeddings role.",
|
||||||
)
|
)
|
||||||
model_size: str = Field(
|
model_size: str = Field(
|
||||||
default="small",
|
default="small",
|
||||||
|
|||||||
@ -592,6 +592,22 @@ class FrigateConfig(FrigateBaseModel):
|
|||||||
)
|
)
|
||||||
role_to_name[role] = name
|
role_to_name[role] = name
|
||||||
|
|
||||||
|
# validate semantic_search.model when it is a GenAI provider name
|
||||||
|
if self.semantic_search.enabled and isinstance(
|
||||||
|
self.semantic_search.model, str
|
||||||
|
):
|
||||||
|
if self.semantic_search.model not in self.genai:
|
||||||
|
raise ValueError(
|
||||||
|
f"semantic_search.model '{self.semantic_search.model}' is not a "
|
||||||
|
"valid GenAI config key. Must match a key in genai config."
|
||||||
|
)
|
||||||
|
genai_cfg = self.genai[self.semantic_search.model]
|
||||||
|
if GenAIRoleEnum.embeddings not in genai_cfg.roles:
|
||||||
|
raise ValueError(
|
||||||
|
f"GenAI provider '{self.semantic_search.model}' must have "
|
||||||
|
"'embeddings' in its roles for semantic search."
|
||||||
|
)
|
||||||
|
|
||||||
# set default min_score for object attributes
|
# set default min_score for object attributes
|
||||||
for attribute in self.model.all_attributes:
|
for attribute in self.model.all_attributes:
|
||||||
if not self.objects.filters.get(attribute):
|
if not self.objects.filters.get(attribute):
|
||||||
|
|||||||
@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
|
|||||||
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
|
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
|
||||||
from frigate.util.file import get_event_thumbnail_bytes
|
from frigate.util.file import get_event_thumbnail_bytes
|
||||||
|
|
||||||
|
from .genai_embedding import GenAIEmbedding
|
||||||
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
|
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
|
||||||
from .onnx.jina_v2_embedding import JinaV2Embedding
|
from .onnx.jina_v2_embedding import JinaV2Embedding
|
||||||
|
|
||||||
@ -73,11 +74,13 @@ class Embeddings:
|
|||||||
config: FrigateConfig,
|
config: FrigateConfig,
|
||||||
db: SqliteVecQueueDatabase,
|
db: SqliteVecQueueDatabase,
|
||||||
metrics: DataProcessorMetrics,
|
metrics: DataProcessorMetrics,
|
||||||
|
genai_manager=None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
self.db = db
|
self.db = db
|
||||||
self.metrics = metrics
|
self.metrics = metrics
|
||||||
self.requestor = InterProcessRequestor()
|
self.requestor = InterProcessRequestor()
|
||||||
|
self.genai_manager = genai_manager
|
||||||
|
|
||||||
self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
|
self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
|
||||||
self.image_eps = EventsPerSecond()
|
self.image_eps = EventsPerSecond()
|
||||||
@ -104,7 +107,27 @@ class Embeddings:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
|
model_cfg = self.config.semantic_search.model
|
||||||
|
is_genai_model = isinstance(model_cfg, str)
|
||||||
|
|
||||||
|
if is_genai_model:
|
||||||
|
embeddings_client = (
|
||||||
|
genai_manager.embeddings_client if genai_manager else None
|
||||||
|
)
|
||||||
|
if not embeddings_client:
|
||||||
|
raise ValueError(
|
||||||
|
f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
|
||||||
|
"no embeddings client is configured. Ensure the GenAI provider "
|
||||||
|
"has 'embeddings' in its roles."
|
||||||
|
)
|
||||||
|
self.embedding = GenAIEmbedding(embeddings_client)
|
||||||
|
self.text_embedding = lambda input_data: self.embedding(
|
||||||
|
input_data, embedding_type="text"
|
||||||
|
)
|
||||||
|
self.vision_embedding = lambda input_data: self.embedding(
|
||||||
|
input_data, embedding_type="vision"
|
||||||
|
)
|
||||||
|
elif model_cfg == SemanticSearchModelEnum.jinav2:
|
||||||
# Single JinaV2Embedding instance for both text and vision
|
# Single JinaV2Embedding instance for both text and vision
|
||||||
self.embedding = JinaV2Embedding(
|
self.embedding = JinaV2Embedding(
|
||||||
model_size=self.config.semantic_search.model_size,
|
model_size=self.config.semantic_search.model_size,
|
||||||
@ -118,7 +141,8 @@ class Embeddings:
|
|||||||
self.vision_embedding = lambda input_data: self.embedding(
|
self.vision_embedding = lambda input_data: self.embedding(
|
||||||
input_data, embedding_type="vision"
|
input_data, embedding_type="vision"
|
||||||
)
|
)
|
||||||
else: # Default to jinav1
|
else:
|
||||||
|
# Default to jinav1
|
||||||
self.text_embedding = JinaV1TextEmbedding(
|
self.text_embedding = JinaV1TextEmbedding(
|
||||||
model_size=config.semantic_search.model_size,
|
model_size=config.semantic_search.model_size,
|
||||||
requestor=self.requestor,
|
requestor=self.requestor,
|
||||||
@ -136,8 +160,11 @@ class Embeddings:
|
|||||||
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
|
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
|
||||||
|
|
||||||
def get_model_definitions(self):
|
def get_model_definitions(self):
|
||||||
# Version-specific models
|
model_cfg = self.config.semantic_search.model
|
||||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
|
if isinstance(model_cfg, str):
|
||||||
|
# GenAI provider: no ONNX models to download
|
||||||
|
models = []
|
||||||
|
elif model_cfg == SemanticSearchModelEnum.jinav2:
|
||||||
models = [
|
models = [
|
||||||
"jinaai/jina-clip-v2-tokenizer",
|
"jinaai/jina-clip-v2-tokenizer",
|
||||||
"jinaai/jina-clip-v2-model_fp16.onnx"
|
"jinaai/jina-clip-v2-model_fp16.onnx"
|
||||||
@ -224,6 +251,14 @@ class Embeddings:
|
|||||||
|
|
||||||
embeddings = self.vision_embedding(valid_thumbs)
|
embeddings = self.vision_embedding(valid_thumbs)
|
||||||
|
|
||||||
|
if len(embeddings) != len(valid_ids):
|
||||||
|
logger.warning(
|
||||||
|
"Batch embed returned %d embeddings for %d thumbnails; skipping batch",
|
||||||
|
len(embeddings),
|
||||||
|
len(valid_ids),
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
if upsert:
|
if upsert:
|
||||||
items = []
|
items = []
|
||||||
for i in range(len(valid_ids)):
|
for i in range(len(valid_ids)):
|
||||||
@ -246,9 +281,15 @@ class Embeddings:
|
|||||||
|
|
||||||
def embed_description(
|
def embed_description(
|
||||||
self, event_id: str, description: str, upsert: bool = True
|
self, event_id: str, description: str, upsert: bool = True
|
||||||
) -> np.ndarray:
|
) -> np.ndarray | None:
|
||||||
start = datetime.datetime.now().timestamp()
|
start = datetime.datetime.now().timestamp()
|
||||||
embedding = self.text_embedding([description])[0]
|
embeddings = self.text_embedding([description])
|
||||||
|
if not embeddings:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to generate description embedding for event %s", event_id
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
embedding = embeddings[0]
|
||||||
|
|
||||||
if upsert:
|
if upsert:
|
||||||
self.db.execute_sql(
|
self.db.execute_sql(
|
||||||
@ -271,8 +312,32 @@ class Embeddings:
|
|||||||
# upsert embeddings one by one to avoid token limit
|
# upsert embeddings one by one to avoid token limit
|
||||||
embeddings = []
|
embeddings = []
|
||||||
|
|
||||||
for desc in event_descriptions.values():
|
for eid, desc in event_descriptions.items():
|
||||||
embeddings.append(self.text_embedding([desc])[0])
|
result = self.text_embedding([desc])
|
||||||
|
if not result:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to generate description embedding for event %s", eid
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
embeddings.append(result[0])
|
||||||
|
|
||||||
|
if not embeddings:
|
||||||
|
logger.warning("No description embeddings generated in batch")
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
# Build ids list for only successful embeddings - we need to track which succeeded
|
||||||
|
ids = list(event_descriptions.keys())
|
||||||
|
if len(embeddings) != len(ids):
|
||||||
|
# Rebuild ids/embeddings for only successful ones (match by order)
|
||||||
|
ids = []
|
||||||
|
embeddings_filtered = []
|
||||||
|
for eid, desc in event_descriptions.items():
|
||||||
|
result = self.text_embedding([desc])
|
||||||
|
if result:
|
||||||
|
ids.append(eid)
|
||||||
|
embeddings_filtered.append(result[0])
|
||||||
|
ids = ids
|
||||||
|
embeddings = embeddings_filtered
|
||||||
|
|
||||||
if upsert:
|
if upsert:
|
||||||
ids = list(event_descriptions.keys())
|
ids = list(event_descriptions.keys())
|
||||||
@ -314,7 +379,10 @@ class Embeddings:
|
|||||||
|
|
||||||
batch_size = (
|
batch_size = (
|
||||||
4
|
4
|
||||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
|
if (
|
||||||
|
isinstance(self.config.semantic_search.model, str)
|
||||||
|
or self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
|
||||||
|
)
|
||||||
else 32
|
else 32
|
||||||
)
|
)
|
||||||
current_page = 1
|
current_page = 1
|
||||||
@ -601,6 +669,8 @@ class Embeddings:
|
|||||||
if trigger.type == "description":
|
if trigger.type == "description":
|
||||||
logger.debug(f"Generating embedding for trigger description {trigger_name}")
|
logger.debug(f"Generating embedding for trigger description {trigger_name}")
|
||||||
embedding = self.embed_description(None, trigger.data, upsert=False)
|
embedding = self.embed_description(None, trigger.data, upsert=False)
|
||||||
|
if embedding is None:
|
||||||
|
return b""
|
||||||
return embedding.astype(np.float32).tobytes()
|
return embedding.astype(np.float32).tobytes()
|
||||||
|
|
||||||
elif trigger.type == "thumbnail":
|
elif trigger.type == "thumbnail":
|
||||||
@ -636,6 +706,8 @@ class Embeddings:
|
|||||||
embedding = self.embed_thumbnail(
|
embedding = self.embed_thumbnail(
|
||||||
str(trigger.data), thumbnail, upsert=False
|
str(trigger.data), thumbnail, upsert=False
|
||||||
)
|
)
|
||||||
|
if embedding is None:
|
||||||
|
return b""
|
||||||
return embedding.astype(np.float32).tobytes()
|
return embedding.astype(np.float32).tobytes()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
89
frigate/embeddings/genai_embedding.py
Normal file
89
frigate/embeddings/genai_embedding.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
"""GenAI-backed embeddings for semantic search."""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from frigate.genai import GenAIClient
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
EMBEDDING_DIM = 768
|
||||||
|
|
||||||
|
|
||||||
|
class GenAIEmbedding:
|
||||||
|
"""Embedding adapter that delegates to a GenAI provider's embed API.
|
||||||
|
|
||||||
|
Provides the same interface as JinaV2Embedding for semantic search:
|
||||||
|
__call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
|
||||||
|
normalized to 768 dimensions for Frigate's sqlite-vec schema.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, client: "GenAIClient") -> None:
|
||||||
|
self.client = client
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self,
|
||||||
|
inputs: list[str] | list[bytes] | list[Image.Image],
|
||||||
|
embedding_type: str = "text",
|
||||||
|
) -> list[np.ndarray]:
|
||||||
|
"""Generate embeddings for text or images.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: List of strings (text) or bytes/PIL images (vision).
|
||||||
|
embedding_type: "text" or "vision".
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of 768-dim numpy float32 arrays.
|
||||||
|
"""
|
||||||
|
if not inputs:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if embedding_type == "text":
|
||||||
|
texts = [str(x) for x in inputs]
|
||||||
|
embeddings = self.client.embed(texts=texts)
|
||||||
|
elif embedding_type == "vision":
|
||||||
|
images: list[bytes] = []
|
||||||
|
for inp in inputs:
|
||||||
|
if isinstance(inp, bytes):
|
||||||
|
images.append(inp)
|
||||||
|
elif isinstance(inp, Image.Image):
|
||||||
|
buf = io.BytesIO()
|
||||||
|
inp.convert("RGB").save(buf, format="JPEG")
|
||||||
|
images.append(buf.getvalue())
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"GenAIEmbedding: skipping unsupported vision input type %s",
|
||||||
|
type(inp).__name__,
|
||||||
|
)
|
||||||
|
if not images:
|
||||||
|
return []
|
||||||
|
embeddings = self.client.embed(images=images)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
|
||||||
|
)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for emb in embeddings:
|
||||||
|
arr = np.asarray(emb, dtype=np.float32)
|
||||||
|
if arr.ndim > 1:
|
||||||
|
# Some providers return token-level embeddings; pool to one vector.
|
||||||
|
arr = arr.mean(axis=0)
|
||||||
|
arr = arr.flatten()
|
||||||
|
if arr.size != EMBEDDING_DIM:
|
||||||
|
if arr.size > EMBEDDING_DIM:
|
||||||
|
arr = arr[:EMBEDDING_DIM]
|
||||||
|
else:
|
||||||
|
arr = np.pad(
|
||||||
|
arr,
|
||||||
|
(0, EMBEDDING_DIM - arr.size),
|
||||||
|
mode="constant",
|
||||||
|
constant_values=0,
|
||||||
|
)
|
||||||
|
result.append(arr)
|
||||||
|
return result
|
||||||
@ -116,8 +116,10 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
models = [Event, Recordings, ReviewSegment, Trigger]
|
models = [Event, Recordings, ReviewSegment, Trigger]
|
||||||
db.bind(models)
|
db.bind(models)
|
||||||
|
|
||||||
|
self.genai_manager = GenAIClientManager(config)
|
||||||
|
|
||||||
if config.semantic_search.enabled:
|
if config.semantic_search.enabled:
|
||||||
self.embeddings = Embeddings(config, db, metrics)
|
self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
|
||||||
|
|
||||||
# Check if we need to re-index events
|
# Check if we need to re-index events
|
||||||
if config.semantic_search.reindex:
|
if config.semantic_search.reindex:
|
||||||
@ -144,7 +146,6 @@ class EmbeddingMaintainer(threading.Thread):
|
|||||||
self.frame_manager = SharedMemoryFrameManager()
|
self.frame_manager = SharedMemoryFrameManager()
|
||||||
|
|
||||||
self.detected_license_plates: dict[str, dict[str, Any]] = {}
|
self.detected_license_plates: dict[str, dict[str, Any]] = {}
|
||||||
self.genai_manager = GenAIClientManager(config)
|
|
||||||
|
|
||||||
# model runners to share between realtime and post processors
|
# model runners to share between realtime and post processors
|
||||||
if self.config.lpr.enabled:
|
if self.config.lpr.enabled:
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from playhouse.shortcuts import model_to_dict
|
from playhouse.shortcuts import model_to_dict
|
||||||
|
|
||||||
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
|
from frigate.config import CameraConfig, GenAIConfig, GenAIProviderEnum
|
||||||
@ -304,6 +305,25 @@ Guidelines:
|
|||||||
"""Get the context window size for this provider in tokens."""
|
"""Get the context window size for this provider in tokens."""
|
||||||
return 4096
|
return 4096
|
||||||
|
|
||||||
|
def embed(
|
||||||
|
self,
|
||||||
|
texts: list[str] | None = None,
|
||||||
|
images: list[bytes] | None = None,
|
||||||
|
) -> list[np.ndarray]:
|
||||||
|
"""Generate embeddings for text and/or images.
|
||||||
|
|
||||||
|
Returns list of numpy arrays (one per input). Expected dimension is 768
|
||||||
|
for Frigate semantic search compatibility.
|
||||||
|
|
||||||
|
Providers that support embeddings should override this method.
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"%s does not support embeddings. "
|
||||||
|
"This method should be overridden by the provider implementation.",
|
||||||
|
self.__class__.__name__,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
def chat_with_tools(
|
def chat_with_tools(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
|
|||||||
@ -1,12 +1,15 @@
|
|||||||
"""llama.cpp Provider for Frigate AI."""
|
"""llama.cpp Provider for Frigate AI."""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from frigate.config import GenAIProviderEnum
|
from frigate.config import GenAIProviderEnum
|
||||||
from frigate.genai import GenAIClient, register_genai_provider
|
from frigate.genai import GenAIClient, register_genai_provider
|
||||||
@ -15,6 +18,20 @@ from frigate.genai.utils import parse_tool_calls_from_message
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _to_jpeg(img_bytes: bytes) -> bytes | None:
|
||||||
|
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
|
||||||
|
try:
|
||||||
|
img = Image.open(io.BytesIO(img_bytes))
|
||||||
|
if img.mode != "RGB":
|
||||||
|
img = img.convert("RGB")
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="JPEG", quality=85)
|
||||||
|
return buf.getvalue()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to convert image to JPEG: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@register_genai_provider(GenAIProviderEnum.llamacpp)
|
@register_genai_provider(GenAIProviderEnum.llamacpp)
|
||||||
class LlamaCppClient(GenAIClient):
|
class LlamaCppClient(GenAIClient):
|
||||||
"""Generative AI client for Frigate using llama.cpp server."""
|
"""Generative AI client for Frigate using llama.cpp server."""
|
||||||
@ -176,6 +193,110 @@ class LlamaCppClient(GenAIClient):
|
|||||||
)
|
)
|
||||||
return result if result else None
|
return result if result else None
|
||||||
|
|
||||||
|
def embed(
|
||||||
|
self,
|
||||||
|
texts: list[str] | None = None,
|
||||||
|
images: list[bytes] | None = None,
|
||||||
|
) -> list[np.ndarray]:
|
||||||
|
"""Generate embeddings via llama.cpp /embeddings endpoint.
|
||||||
|
|
||||||
|
Supports batch requests. Uses content format with prompt_string and
|
||||||
|
multimodal_data for images (PR #15108). Server must be started with
|
||||||
|
--embeddings and --mmproj for multimodal support.
|
||||||
|
"""
|
||||||
|
if self.provider is None:
|
||||||
|
logger.warning(
|
||||||
|
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
texts = texts or []
|
||||||
|
images = images or []
|
||||||
|
if not texts and not images:
|
||||||
|
return []
|
||||||
|
|
||||||
|
EMBEDDING_DIM = 768
|
||||||
|
|
||||||
|
content = []
|
||||||
|
for text in texts:
|
||||||
|
content.append({"prompt_string": text})
|
||||||
|
for img in images:
|
||||||
|
# llama.cpp uses STB which does not support WebP; convert to JPEG
|
||||||
|
jpeg_bytes = _to_jpeg(img)
|
||||||
|
to_encode = jpeg_bytes if jpeg_bytes is not None else img
|
||||||
|
encoded = base64.b64encode(to_encode).decode("utf-8")
|
||||||
|
# prompt_string must contain <__media__> placeholder for image tokenization
|
||||||
|
content.append(
|
||||||
|
{
|
||||||
|
"prompt_string": "<__media__>\n",
|
||||||
|
"multimodal_data": [encoded],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.provider}/embeddings",
|
||||||
|
json={"model": self.genai_config.model, "content": content},
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
items = result.get("data", result) if isinstance(result, dict) else result
|
||||||
|
if not isinstance(items, list):
|
||||||
|
logger.warning("llama.cpp embeddings returned unexpected format")
|
||||||
|
return []
|
||||||
|
|
||||||
|
embeddings = []
|
||||||
|
for item in items:
|
||||||
|
emb = item.get("embedding") if isinstance(item, dict) else None
|
||||||
|
if emb is None:
|
||||||
|
logger.warning("llama.cpp embeddings item missing embedding field")
|
||||||
|
continue
|
||||||
|
arr = np.array(emb, dtype=np.float32)
|
||||||
|
if arr.ndim > 1:
|
||||||
|
# llama.cpp can return token-level embeddings; pool per item
|
||||||
|
arr = arr.mean(axis=0)
|
||||||
|
arr = arr.flatten()
|
||||||
|
orig_dim = arr.size
|
||||||
|
if orig_dim != EMBEDDING_DIM:
|
||||||
|
if orig_dim > EMBEDDING_DIM:
|
||||||
|
arr = arr[:EMBEDDING_DIM]
|
||||||
|
logger.debug(
|
||||||
|
"Truncated llama.cpp embedding from %d to %d dimensions",
|
||||||
|
orig_dim,
|
||||||
|
EMBEDDING_DIM,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
arr = np.pad(
|
||||||
|
arr,
|
||||||
|
(0, EMBEDDING_DIM - orig_dim),
|
||||||
|
mode="constant",
|
||||||
|
constant_values=0,
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"Padded llama.cpp embedding from %d to %d dimensions",
|
||||||
|
orig_dim,
|
||||||
|
EMBEDDING_DIM,
|
||||||
|
)
|
||||||
|
embeddings.append(arr)
|
||||||
|
return embeddings
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
logger.warning("llama.cpp embeddings request timed out")
|
||||||
|
return []
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
error_detail = str(e)
|
||||||
|
if hasattr(e, "response") and e.response is not None:
|
||||||
|
try:
|
||||||
|
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
logger.warning("llama.cpp embeddings error: %s", error_detail)
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
|
||||||
|
return []
|
||||||
|
|
||||||
def chat_with_tools(
|
def chat_with_tools(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
|
|||||||
@ -1,3 +1,6 @@
|
|||||||
|
/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
|
||||||
|
export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
|
||||||
|
|
||||||
export const supportedLanguageKeys = [
|
export const supportedLanguageKeys = [
|
||||||
"en",
|
"en",
|
||||||
"es",
|
"es",
|
||||||
|
|||||||
@ -23,6 +23,7 @@ import { toast } from "sonner";
|
|||||||
import useSWR from "swr";
|
import useSWR from "swr";
|
||||||
import useSWRInfinite from "swr/infinite";
|
import useSWRInfinite from "swr/infinite";
|
||||||
import { useDocDomain } from "@/hooks/use-doc-domain";
|
import { useDocDomain } from "@/hooks/use-doc-domain";
|
||||||
|
import { JINA_EMBEDDING_MODELS } from "@/lib/const";
|
||||||
|
|
||||||
const API_LIMIT = 25;
|
const API_LIMIT = 25;
|
||||||
|
|
||||||
@ -293,7 +294,12 @@ export default function Explore() {
|
|||||||
const modelVersion = config?.semantic_search.model || "jinav1";
|
const modelVersion = config?.semantic_search.model || "jinav1";
|
||||||
const modelSize = config?.semantic_search.model_size || "small";
|
const modelSize = config?.semantic_search.model_size || "small";
|
||||||
|
|
||||||
// Text model state
|
// GenAI providers have no local models to download
|
||||||
|
const isGenaiEmbeddings =
|
||||||
|
typeof modelVersion === "string" &&
|
||||||
|
!(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
|
||||||
|
|
||||||
|
// Text model state (skipped for GenAI - no local models)
|
||||||
const { payload: textModelState } = useModelState(
|
const { payload: textModelState } = useModelState(
|
||||||
modelVersion === "jinav1"
|
modelVersion === "jinav1"
|
||||||
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
|
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
|
||||||
@ -328,6 +334,10 @@ export default function Explore() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
const allModelsLoaded = useMemo(() => {
|
const allModelsLoaded = useMemo(() => {
|
||||||
|
if (isGenaiEmbeddings) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
textModelState === "downloaded" &&
|
textModelState === "downloaded" &&
|
||||||
textTokenizerState === "downloaded" &&
|
textTokenizerState === "downloaded" &&
|
||||||
@ -335,6 +345,7 @@ export default function Explore() {
|
|||||||
visionFeatureExtractorState === "downloaded"
|
visionFeatureExtractorState === "downloaded"
|
||||||
);
|
);
|
||||||
}, [
|
}, [
|
||||||
|
isGenaiEmbeddings,
|
||||||
textModelState,
|
textModelState,
|
||||||
textTokenizerState,
|
textTokenizerState,
|
||||||
visionModelState,
|
visionModelState,
|
||||||
@ -358,10 +369,11 @@ export default function Explore() {
|
|||||||
!defaultViewLoaded ||
|
!defaultViewLoaded ||
|
||||||
(config?.semantic_search.enabled &&
|
(config?.semantic_search.enabled &&
|
||||||
(!reindexState ||
|
(!reindexState ||
|
||||||
!textModelState ||
|
(!isGenaiEmbeddings &&
|
||||||
!textTokenizerState ||
|
(!textModelState ||
|
||||||
!visionModelState ||
|
!textTokenizerState ||
|
||||||
!visionFeatureExtractorState))
|
!visionModelState ||
|
||||||
|
!visionFeatureExtractorState))))
|
||||||
) {
|
) {
|
||||||
return (
|
return (
|
||||||
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />
|
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user