diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 8d12feb32..4de0fd7de 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -146,13 +146,14 @@ class Embeddings: ids = list(event_thumbs.keys()) embeddings = self.vision_embedding(images) items = [(ids[i], serialize(embeddings[i])) for i in range(len(ids))] + flat_items = [item for sublist in items for item in sublist] self.db.execute_sql( """ INSERT OR REPLACE INTO vec_thumbnails(id, thumbnail_embedding) VALUES {} """.format(", ".join(["(?, ?)"] * len(items))), - items, + flat_items, ) return embeddings @@ -172,13 +173,14 @@ class Embeddings: embeddings = self.text_embedding(list(event_descriptions.values())) ids = list(event_descriptions.keys()) items = [(ids[i], serialize(embeddings[i])) for i in range(len(ids))] + flat_items = [item for sublist in items for item in sublist] self.db.execute_sql( """ INSERT OR REPLACE INTO vec_descriptions(id, description_embedding) VALUES {} """.format(", ".join(["(?, ?)"] * len(items))), - items, + flat_items, ) return embeddings @@ -196,16 +198,6 @@ class Embeddings: os.remove(os.path.join(CONFIG_DIR, ".search_stats.json")) st = time.time() - totals = { - "thumbnails": 0, - "descriptions": 0, - "processed_objects": 0, - "total_objects": 0, - "time_remaining": 0, - "status": "indexing", - } - - self.requestor.send_data(UPDATE_EMBEDDINGS_REINDEX_PROGRESS, totals) # Get total count of events to process total_events = ( @@ -216,11 +208,21 @@ class Embeddings: ) .count() ) - totals["total_objects"] = total_events batch_size = 32 current_page = 1 + totals = { + "thumbnails": 0, + "descriptions": 0, + "processed_objects": total_events - 1 if total_events < batch_size else 0, + "total_objects": total_events, + "time_remaining": 0 if total_events < batch_size else -1, + "status": "indexing", + } + + self.requestor.send_data(UPDATE_EMBEDDINGS_REINDEX_PROGRESS, totals) + events = ( Event.select() .where( diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 765a7e88c..6744fae5c 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -164,8 +164,15 @@ class GenericONNXEmbedding: return [] if self.model_type == "text": + max_length = max(len(self.tokenizer.encode(text)) for text in inputs) processed_inputs = [ - self.tokenizer(text, padding=True, truncation=True, return_tensors="np") + self.tokenizer( + text, + padding="max_length", + truncation=True, + max_length=max_length, + return_tensors="np", + ) for text in inputs ] else: @@ -184,7 +191,8 @@ class GenericONNXEmbedding: onnx_inputs[key].append(value[0]) for key in onnx_inputs.keys(): - onnx_inputs[key] = np.array(onnx_inputs[key]) + if onnx_inputs[key]: + onnx_inputs[key] = np.stack(onnx_inputs[key]) embeddings = self.runner.run(onnx_inputs)[0] return [embedding for embedding in embeddings]