From ced2e318572b02d56d6811ace61c9485c2827390 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Mon, 2 Mar 2026 14:31:04 -0700 Subject: [PATCH] Handle emb correctly --- frigate/embeddings/genai_embedding.py | 6 +++++- frigate/genai/llama_cpp.py | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/frigate/embeddings/genai_embedding.py b/frigate/embeddings/genai_embedding.py index 0059693f8..d3637bb73 100644 --- a/frigate/embeddings/genai_embedding.py +++ b/frigate/embeddings/genai_embedding.py @@ -70,7 +70,11 @@ class GenAIEmbedding: result = [] for emb in embeddings: - arr = np.asarray(emb, dtype=np.float32).flatten() + arr = np.asarray(emb, dtype=np.float32) + if arr.ndim > 1: + # Some providers return token-level embeddings; pool to one vector. + arr = arr.mean(axis=0) + arr = arr.flatten() if arr.size != EMBEDDING_DIM: if arr.size > EMBEDDING_DIM: arr = arr[:EMBEDDING_DIM] diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py index dd6dc6f71..f9c251790 100644 --- a/frigate/genai/llama_cpp.py +++ b/frigate/genai/llama_cpp.py @@ -254,6 +254,10 @@ class LlamaCppClient(GenAIClient): logger.warning("llama.cpp embeddings item missing embedding field") continue arr = np.array(emb, dtype=np.float32) + if arr.ndim > 1: + # llama.cpp can return token-level embeddings; pool per item + arr = arr.mean(axis=0) + arr = arr.flatten() orig_dim = arr.size if orig_dim != EMBEDDING_DIM: if orig_dim > EMBEDDING_DIM: