diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index deb90c3d7..0c460cfad 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,7 +10,7 @@ "features": { "ghcr.io/devcontainers/features/common-utils:1": {} }, - "forwardPorts": [5000, 5001, 5173, 1935, 8000, 8554, 8555], + "forwardPorts": [5000, 5001, 5173, 1935, 8554, 8555], "portsAttributes": { "5000": { "label": "NGINX", @@ -28,10 +28,6 @@ "label": "RTMP", "onAutoForward": "silent" }, - "8000": { - "label": "Chroma", - "onAutoForward": "silent" - }, "8554": { "label": "gortc RTSP", "onAutoForward": "silent" diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile index 0d0607c95..f3e62eee8 100644 --- a/docker/main/Dockerfile +++ b/docker/main/Dockerfile @@ -178,6 +178,10 @@ ENV NVIDIA_DRIVER_CAPABILITIES="compute,video,utility" # Turn off Chroma Telemetry: https://docs.trychroma.com/telemetry#opting-out ENV ANONYMIZED_TELEMETRY=False +# Allow resetting the chroma database +ENV ALLOW_RESET=True +# Disable tokenizer parallelism warning +ENV TOKENIZERS_PARALLELISM=true ENV PATH="/usr/lib/btbn-ffmpeg/bin:/usr/local/go2rtc/bin:/usr/local/nginx/sbin:${PATH}" @@ -243,10 +247,10 @@ CMD ["sleep", "infinity"] FROM --platform=$BUILDPLATFORM node:20 AS web-build WORKDIR /work -COPY web/package.json web/package-lock.json ./ +COPY web-old/package.json web-old/package-lock.json ./ RUN npm install -COPY web/ ./ +COPY web-old/ ./ RUN npm run build \ && mv dist/BASE_PATH/monacoeditorwork/* dist/assets/ \ && rm -rf dist/BASE_PATH diff --git a/frigate/app.py b/frigate/app.py index cad2a551d..315465312 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -31,6 +31,7 @@ from frigate.const import ( MODEL_CACHE_DIR, RECORD_DIR, ) +from frigate.embeddings import Embeddings from frigate.embeddings.processor import EmbeddingProcessor from frigate.events.audio import listen_to_audio from frigate.events.cleanup import EventCleanup @@ -341,6 +342,13 @@ class FrigateApp: migrate_db.close() + def init_embeddings(self) -> None: + self.embeddings: Embeddings = None + try: + self.embeddings = Embeddings(self.config) + except ValueError: + pass + def init_go2rtc(self) -> None: for proc in psutil.process_iter(["pid", "name"]): if proc.info["name"] == "go2rtc": @@ -407,6 +415,7 @@ class FrigateApp: self.onvif_controller, self.external_event_processor, self.plus_api, + self.embeddings, ) def init_onvif(self) -> None: @@ -590,7 +599,7 @@ class FrigateApp: def start_embeddings_processor(self) -> None: self.embeddings_processor = EmbeddingProcessor( - self.config, self.embeddings_queue, self.stop_event + self.config, self.embeddings, self.embeddings_queue, self.stop_event ) self.embeddings_processor.start() @@ -607,7 +616,7 @@ class FrigateApp: self.event_processor.start() def start_event_cleanup(self) -> None: - self.event_cleanup = EventCleanup(self.config, self.stop_event) + self.event_cleanup = EventCleanup(self.config, self.embeddings, self.stop_event) self.event_cleanup.start() def start_record_cleanup(self) -> None: @@ -688,10 +697,11 @@ class FrigateApp: self.set_log_levels() self.init_queues() self.init_database() + self.bind_database() + self.init_embeddings() self.init_onvif() self.init_recording_manager() self.init_go2rtc() - self.bind_database() self.init_inter_process_communicator() self.init_dispatcher() except Exception as e: diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index e69de29bb..8a3d3d84b 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -0,0 +1,103 @@ +"""ChromaDB embeddings database.""" +import base64 +import io +import logging +import os +import time + +import numpy as np +from chromadb import Collection +from chromadb import HttpClient as ChromaClient +from chromadb.config import Settings +from PIL import Image +from playhouse.shortcuts import model_to_dict + +from frigate.config import FrigateConfig +from frigate.const import CONFIG_DIR +from frigate.models import Event + +from .functions.clip import ClipEmbedding +from .functions.minilm_l6_v2 import MiniLMEmbedding + +logger = logging.getLogger(__name__) + + +def get_metadata(event: Event) -> dict: + """Extract valid event metadata.""" + return { + k: v + for k, v in model_to_dict(event).items() + if k not in ["id", "thumbnail"] + and v is not None + and isinstance(v, (str, int, float, bool)) + } + + +class Embeddings: + """ChromaDB embeddings database.""" + + def __init__(self, config: FrigateConfig) -> None: + if not config.semantic_search.enabled: + raise ValueError("Semantic search is not enabled.") + self.config: FrigateConfig = config + self.client: ChromaClient = ChromaClient( + host="127.0.0.1", + settings=Settings(anonymized_telemetry=False), + ) + + reindex = os.path.exists(f"{CONFIG_DIR}/.reindex_events") + if reindex: + logger.info("Indexing event embeddings...") + self.client.reset() + + self.thumbnail: Collection = self.client.get_or_create_collection( + name="event_thumbnail", embedding_function=ClipEmbedding() + ) + self.description: Collection = self.client.get_or_create_collection( + name="event_description", embedding_function=MiniLMEmbedding() + ) + + if reindex: + # On startup, embed all existing events + st = time.time() + + thumbnails = {"ids": [], "images": [], "metadatas": []} + descriptions = {"ids": [], "documents": [], "metadatas": []} + + events = Event.select().where( + (Event.has_clip == True | Event.has_snapshot == True) + & Event.thumbnail.is_null(False) + ) + + event: Event + for event in events.iterator(): + metadata = get_metadata(event) + thumbnail = base64.b64decode(event.thumbnail) + img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB")) + thumbnails["ids"].append(event.id) + thumbnails["images"].append(img) + thumbnails["metadatas"].append(metadata) + if event.data.get("description") is not None: + descriptions["ids"].append(event.id) + descriptions["documents"].append(event.data["description"]) + descriptions["metadatas"].append(metadata) + + self.thumbnail.upsert( + images=thumbnails["images"], + metadatas=thumbnails["metadatas"], + ids=thumbnails["ids"], + ) + self.description.upsert( + documents=descriptions["documents"], + metadatas=descriptions["metadatas"], + ids=descriptions["ids"], + ) + + logger.info( + "Embedded %d thumbnails and %d descriptions in %s seconds", + len(thumbnails["ids"]), + len(descriptions["ids"]), + time.time() - st, + ) + + os.remove(f"{CONFIG_DIR}/.reindex_events") diff --git a/frigate/embeddings/processor.py b/frigate/embeddings/processor.py index 15908c9f3..62bb1e1ac 100644 --- a/frigate/embeddings/processor.py +++ b/frigate/embeddings/processor.py @@ -10,18 +10,13 @@ from multiprocessing.synchronize import Event as MpEvent import google.generativeai as genai import numpy as np -from chromadb import Collection -from chromadb import HttpClient as ChromaClient -from chromadb.config import Settings from peewee import DoesNotExist from PIL import Image -from playhouse.shortcuts import model_to_dict from frigate.config import FrigateConfig from frigate.models import Event -from .functions.clip import ClipEmbedding -from .functions.minilm_l6_v2 import MiniLMEmbedding +from . import Embeddings, get_metadata logger = logging.getLogger(__name__) @@ -32,39 +27,25 @@ class EmbeddingProcessor(threading.Thread): def __init__( self, config: FrigateConfig, + embeddings: Embeddings, queue: Queue, stop_event: MpEvent, ) -> None: threading.Thread.__init__(self) self.name = "chroma" self.config = config + self.embeddings = embeddings self.queue = queue self.stop_event = stop_event - self.chroma: ChromaClient = None - self.thumbnail: Collection = None - self.description: Collection = None self.gemini: genai.GenerativeModel = None def run(self) -> None: """Maintain a Chroma vector database for semantic search.""" # Exit if disabled - if not self.config.semantic_search.enabled: + if self.embeddings is None: return - # Create the database - self.chroma = ChromaClient( - host="127.0.0.1", settings=Settings(anonymized_telemetry=False) - ) - - # Create/Load the collection(s) - self.thumbnail = self.chroma.get_or_create_collection( - name="event_thumbnail", embedding_function=ClipEmbedding() - ) - self.description = self.chroma.get_or_create_collection( - name="event_description", embedding_function=MiniLMEmbedding() - ) - ## Initialize Gemini if self.config.gemini.enabled: genai.configure(api_key=self.config.gemini.api_key) @@ -88,13 +69,7 @@ class EmbeddingProcessor(threading.Thread): continue # Extract valid event metadata - metadata = { - k: v - for k, v in model_to_dict(event).items() - if k not in ["id", "thumbnail"] - and v is not None - and isinstance(v, (str, int, float, bool)) - } + metadata = get_metadata(event) thumbnail = base64.b64decode(event.thumbnail) # Encode the thumbnail @@ -124,7 +99,7 @@ class EmbeddingProcessor(threading.Thread): # Encode the thumbnail img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB")) - self.thumbnail.add( + self.embeddings.thumbnail.upsert( images=[img], metadatas=[metadata], ids=[event_id], @@ -163,7 +138,7 @@ class EmbeddingProcessor(threading.Thread): event.save() # Encode the description - self.description.add( + self.embeddings.description.upsert( documents=[description], metadatas=[metadata], ids=[event.id], diff --git a/frigate/events/cleanup.py b/frigate/events/cleanup.py index 98da72f6c..25b30690a 100644 --- a/frigate/events/cleanup.py +++ b/frigate/events/cleanup.py @@ -10,6 +10,7 @@ from pathlib import Path from frigate.config import FrigateConfig from frigate.const import CLIPS_DIR +from frigate.embeddings import Embeddings from frigate.models import Event, Timeline logger = logging.getLogger(__name__) @@ -21,10 +22,13 @@ class EventCleanupType(str, Enum): class EventCleanup(threading.Thread): - def __init__(self, config: FrigateConfig, stop_event: MpEvent): + def __init__( + self, config: FrigateConfig, embeddings: Embeddings, stop_event: MpEvent + ): threading.Thread.__init__(self) self.name = "event_cleanup" self.config = config + self.embeddings = embeddings self.stop_event = stop_event self.camera_keys = list(self.config.cameras.keys()) self.removed_camera_labels: list[str] = None @@ -204,11 +208,12 @@ class EventCleanup(threading.Thread): media_path = Path(f"{os.path.join(CLIPS_DIR, media_name)}-clean.png") media_path.unlink(missing_ok=True) - ( - Event.delete() - .where(Event.id << [event.id for event in duplicate_events]) - .execute() - ) + duplicate_ids = [event.id for event in duplicate_events] + Event.delete().where(Event.id << duplicate_ids).execute() + # Also remove from embeddings database + if self.embeddings is not None and len(duplicate_ids) > 0: + self.embeddings.thumbnail.delete(duplicate_ids) + self.embeddings.description.delete(duplicate_ids) def run(self) -> None: # only expire events every 5 minutes @@ -223,10 +228,20 @@ class EventCleanup(threading.Thread): self.expire(EventCleanupType.snapshots) self.purge_duplicates() - # drop events from db where has_clip and has_snapshot are false - delete_query = Event.delete().where( - Event.has_clip == False, Event.has_snapshot == False + # get list of ids that have both expired clips and snapshots + # so we can delete them from the embeddings db (and the events table) + events = ( + Event.select(Event.id) + .where(Event.has_clip == False, Event.has_snapshot == False) + .iterator() ) - delete_query.execute() + events_to_delete = [e.id for e in events] + + if self.embeddings is not None and len(events_to_delete) > 0: + self.embeddings.thumbnail.delete(events_to_delete) + self.embeddings.description.delete(events_to_delete) + + # drop events from db where has_clip and has_snapshot are false + Event.delete().where(Event.id << events_to_delete).execute() logger.info("Exiting event cleanup...") diff --git a/frigate/http.py b/frigate/http.py index 2be2a3961..065340ee1 100644 --- a/frigate/http.py +++ b/frigate/http.py @@ -1,6 +1,7 @@ import base64 import copy import glob +import io import json import logging import os @@ -18,8 +19,6 @@ import numpy as np import pytz import requests from chromadb import Collection, QueryResult -from chromadb import HttpClient as ChromaClient -from chromadb.config import Settings from flask import ( Blueprint, Flask, @@ -31,6 +30,7 @@ from flask import ( request, ) from peewee import DoesNotExist, fn, operator +from PIL import Image from playhouse.shortcuts import model_to_dict from playhouse.sqliteq import SqliteQueueDatabase from tzlocal import get_localzone_name @@ -45,8 +45,7 @@ from frigate.const import ( MAX_SEGMENT_DURATION, RECORD_DIR, ) -from frigate.embeddings.functions.clip import ClipEmbedding -from frigate.embeddings.functions.minilm_l6_v2 import MiniLMEmbedding +from frigate.embeddings import Embeddings from frigate.events.external import ExternalEventProcessor from frigate.models import Event, Previews, Recordings, Regions, Timeline from frigate.object_processing import TrackedObject @@ -79,6 +78,7 @@ def create_app( onvif: OnvifController, external_processor: ExternalEventProcessor, plus_api: PlusApi, + embeddings: Embeddings, ): app = Flask(__name__) @@ -108,15 +108,7 @@ def create_app( app.plus_api = plus_api app.camera_error_image = None app.hwaccel_errors = [] - app.chroma = ChromaClient( - host="127.0.0.1", settings=Settings(anonymized_telemetry=False) - ) - app.thumbnail_collection = app.chroma.get_or_create_collection( - name="event_thumbnail", embedding_function=ClipEmbedding() - ) - app.description_collection = app.chroma.get_or_create_collection( - name="event_description", embedding_function=MiniLMEmbedding() - ) + app.embeddings = embeddings app.register_blueprint(bp) @@ -529,6 +521,9 @@ def delete_event(id): event.delete_instance() Timeline.delete().where(Timeline.source_id == id).execute() + if current_app.embeddings is not None: + current_app.embeddings.thumbnail.delete(ids=[id]) + current_app.embeddings.description.delete(ids=[id]) return make_response( jsonify({"success": True, "message": "Event " + id + " deleted"}), 200 ) @@ -1013,6 +1008,7 @@ def events(): min_length = request.args.get("min_length", type=float) max_length = request.args.get("max_length", type=float) search = request.args.get("search", type=str) or None + like = request.args.get("like", type=str) or None clauses = [] @@ -1168,37 +1164,57 @@ def events(): # Handle semantic search event_order = None - if search is not None: + if current_app.embeddings is not None: where = None if len(embeddings_filters) > 1: where = {"$and": embeddings_filters} elif len(embeddings_filters) == 1: where = embeddings_filters[0] - # Grab the ids of the events that match based on CLIP embeddings - thumbnails: Collection = current_app.thumbnail_collection - thumb_result: QueryResult = thumbnails.query( - query_texts=[search], - n_results=int(limit), - where=where, - ) - thumb_ids = dict(zip(thumb_result["ids"][0], thumb_result["distances"][0])) + if like is not None: + # Grab the ids of events that match the thumbnail image embeddings + thumbnails: Collection = current_app.embeddings.thumbnail + search_event = Event.get(Event.id == like) + thumbnail = base64.b64decode(search_event.thumbnail) + img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB")) + thumb_result: QueryResult = thumbnails.query( + query_images=[img], + n_results=int(limit), + where=where, + ) + event_order = dict( + zip(thumb_result["ids"][0], thumb_result["distances"][0]) + ) - # Grab the ids of the events that match based on MiniLM embeddings - descriptions: Collection = current_app.description_collection - desc_result: QueryResult = descriptions.query( - query_texts=[search], - n_results=int(limit), - where=where, - ) - desc_ids = dict(zip(desc_result["ids"][0], desc_result["distances"][0])) + # For like, we want to remove all other filters + clauses = [(Event.id << list(event_order.keys()))] - event_order = { - k: min(i for i in (thumb_ids.get(k), desc_ids.get(k)) if i is not None) - for k in thumb_ids.keys() | desc_ids - } + elif search is not None: + # Grab the ids of the events that match based on CLIP embeddings + thumbnails: Collection = current_app.embeddings.thumbnail + thumb_result: QueryResult = thumbnails.query( + query_texts=[search], + n_results=int(limit), + where=where, + ) + thumb_ids = dict(zip(thumb_result["ids"][0], thumb_result["distances"][0])) - clauses.append((Event.id << list(event_order.keys()))) + # Grab the ids of the events that match based on MiniLM embeddings + descriptions: Collection = current_app.embeddings.description + desc_result: QueryResult = descriptions.query( + query_texts=[search], + n_results=int(limit), + where=where, + ) + desc_ids = dict(zip(desc_result["ids"][0], desc_result["distances"][0])) + + event_order = { + k: min(i for i in (thumb_ids.get(k), desc_ids.get(k)) if i is not None) + for k in thumb_ids.keys() | desc_ids + } + + # For search, we want to keep all the other clauses and filters + clauses.append((Event.id << list(event_order.keys()))) events = ( Event.select(*selected_columns) diff --git a/web-old/src/routes/Events.jsx b/web-old/src/routes/Events.jsx index 8261badd0..d0dd86b57 100644 --- a/web-old/src/routes/Events.jsx +++ b/web-old/src/routes/Events.jsx @@ -259,6 +259,16 @@ export default function Events({ path, ...props }) { setState({ ...state, showDownloadMenu: true }); }; + const showSimilarEvents = (event_id, e) => { + if (e) { + e.stopPropagation(); + } + if (searchParams?.like == event_id) { + return; + } + onFilter('like', event_id); + }; + const showSubmitToPlus = (event_id, label, box, e) => { if (e) { e.stopPropagation(); @@ -289,6 +299,7 @@ export default function Events({ path, ...props }) { (name, value) => { setShowInProgress(false); const updatedParams = { ...searchParams, [name]: value }; + if (name !== 'like') delete updatedParams['like']; setSearchParams(updatedParams); const queryString = Object.keys(updatedParams) .map((key) => { @@ -311,7 +322,10 @@ export default function Events({ path, ...props }) { onFilter('is_submitted', searchParams.is_submitted); }, [searchParams, onFilter]); - const isDone = (eventPages?.[eventPages.length - 1]?.length ?? 0) < API_LIMIT; + const isDone = + (eventPages?.[eventPages.length - 1]?.length ?? 0) < API_LIMIT || + (searchParams?.search?.length ?? 0) > 0 || + (searchParams?.like?.length ?? 0) > 0; // hooks for infinite scroll const observer = useRef(); @@ -471,7 +485,8 @@ export default function Events({ path, ...props }) { download /> )} - {(event?.data?.type || 'object') == 'object' && + {config.plus.enabled && + (event?.data?.type || 'object') == 'object' && downloadEvent.end_time && downloadEvent.has_snapshot && !downloadEvent.plus_id && ( @@ -482,7 +497,7 @@ export default function Events({ path, ...props }) { onSelect={() => showSubmitToPlus(downloadEvent.id, downloadEvent.label, downloadEvent.box)} /> )} - {downloadEvent.plus_id && ( + {config.plus.enabled && downloadEvent.plus_id && ( ); @@ -750,6 +766,7 @@ export default function Events({ path, ...props }) { }); }} onSave={onSave} + showSimilarEvents={showSimilarEvents} showSubmitToPlus={showSubmitToPlus} /> ); @@ -782,6 +799,7 @@ function Event({ onDownloadClick, onReady, onSave, + showSimilarEvents, showSubmitToPlus, }) { const apiHost = useApiHost(); @@ -851,30 +869,39 @@ function Event({