delete old vectors as events removed from DB, add image similarity search

This commit is contained in:
Jason Hunter 2023-12-19 01:19:26 -05:00
parent 343136341b
commit dbf9f0cb05
8 changed files with 258 additions and 112 deletions

View File

@ -10,7 +10,7 @@
"features": {
"ghcr.io/devcontainers/features/common-utils:1": {}
},
"forwardPorts": [5000, 5001, 5173, 1935, 8000, 8554, 8555],
"forwardPorts": [5000, 5001, 5173, 1935, 8554, 8555],
"portsAttributes": {
"5000": {
"label": "NGINX",
@ -28,10 +28,6 @@
"label": "RTMP",
"onAutoForward": "silent"
},
"8000": {
"label": "Chroma",
"onAutoForward": "silent"
},
"8554": {
"label": "gortc RTSP",
"onAutoForward": "silent"

View File

@ -178,6 +178,10 @@ ENV NVIDIA_DRIVER_CAPABILITIES="compute,video,utility"
# Turn off Chroma Telemetry: https://docs.trychroma.com/telemetry#opting-out
ENV ANONYMIZED_TELEMETRY=False
# Allow resetting the chroma database
ENV ALLOW_RESET=True
# Disable tokenizer parallelism warning
ENV TOKENIZERS_PARALLELISM=true
ENV PATH="/usr/lib/btbn-ffmpeg/bin:/usr/local/go2rtc/bin:/usr/local/nginx/sbin:${PATH}"
@ -243,10 +247,10 @@ CMD ["sleep", "infinity"]
FROM --platform=$BUILDPLATFORM node:20 AS web-build
WORKDIR /work
COPY web/package.json web/package-lock.json ./
COPY web-old/package.json web-old/package-lock.json ./
RUN npm install
COPY web/ ./
COPY web-old/ ./
RUN npm run build \
&& mv dist/BASE_PATH/monacoeditorwork/* dist/assets/ \
&& rm -rf dist/BASE_PATH

View File

@ -31,6 +31,7 @@ from frigate.const import (
MODEL_CACHE_DIR,
RECORD_DIR,
)
from frigate.embeddings import Embeddings
from frigate.embeddings.processor import EmbeddingProcessor
from frigate.events.audio import listen_to_audio
from frigate.events.cleanup import EventCleanup
@ -341,6 +342,13 @@ class FrigateApp:
migrate_db.close()
def init_embeddings(self) -> None:
self.embeddings: Embeddings = None
try:
self.embeddings = Embeddings(self.config)
except ValueError:
pass
def init_go2rtc(self) -> None:
for proc in psutil.process_iter(["pid", "name"]):
if proc.info["name"] == "go2rtc":
@ -407,6 +415,7 @@ class FrigateApp:
self.onvif_controller,
self.external_event_processor,
self.plus_api,
self.embeddings,
)
def init_onvif(self) -> None:
@ -590,7 +599,7 @@ class FrigateApp:
def start_embeddings_processor(self) -> None:
self.embeddings_processor = EmbeddingProcessor(
self.config, self.embeddings_queue, self.stop_event
self.config, self.embeddings, self.embeddings_queue, self.stop_event
)
self.embeddings_processor.start()
@ -607,7 +616,7 @@ class FrigateApp:
self.event_processor.start()
def start_event_cleanup(self) -> None:
self.event_cleanup = EventCleanup(self.config, self.stop_event)
self.event_cleanup = EventCleanup(self.config, self.embeddings, self.stop_event)
self.event_cleanup.start()
def start_record_cleanup(self) -> None:
@ -688,10 +697,11 @@ class FrigateApp:
self.set_log_levels()
self.init_queues()
self.init_database()
self.bind_database()
self.init_embeddings()
self.init_onvif()
self.init_recording_manager()
self.init_go2rtc()
self.bind_database()
self.init_inter_process_communicator()
self.init_dispatcher()
except Exception as e:

View File

@ -0,0 +1,103 @@
"""ChromaDB embeddings database."""
import base64
import io
import logging
import os
import time
import numpy as np
from chromadb import Collection
from chromadb import HttpClient as ChromaClient
from chromadb.config import Settings
from PIL import Image
from playhouse.shortcuts import model_to_dict
from frigate.config import FrigateConfig
from frigate.const import CONFIG_DIR
from frigate.models import Event
from .functions.clip import ClipEmbedding
from .functions.minilm_l6_v2 import MiniLMEmbedding
logger = logging.getLogger(__name__)
def get_metadata(event: Event) -> dict:
"""Extract valid event metadata."""
return {
k: v
for k, v in model_to_dict(event).items()
if k not in ["id", "thumbnail"]
and v is not None
and isinstance(v, (str, int, float, bool))
}
class Embeddings:
"""ChromaDB embeddings database."""
def __init__(self, config: FrigateConfig) -> None:
if not config.semantic_search.enabled:
raise ValueError("Semantic search is not enabled.")
self.config: FrigateConfig = config
self.client: ChromaClient = ChromaClient(
host="127.0.0.1",
settings=Settings(anonymized_telemetry=False),
)
reindex = os.path.exists(f"{CONFIG_DIR}/.reindex_events")
if reindex:
logger.info("Indexing event embeddings...")
self.client.reset()
self.thumbnail: Collection = self.client.get_or_create_collection(
name="event_thumbnail", embedding_function=ClipEmbedding()
)
self.description: Collection = self.client.get_or_create_collection(
name="event_description", embedding_function=MiniLMEmbedding()
)
if reindex:
# On startup, embed all existing events
st = time.time()
thumbnails = {"ids": [], "images": [], "metadatas": []}
descriptions = {"ids": [], "documents": [], "metadatas": []}
events = Event.select().where(
(Event.has_clip == True | Event.has_snapshot == True)
& Event.thumbnail.is_null(False)
)
event: Event
for event in events.iterator():
metadata = get_metadata(event)
thumbnail = base64.b64decode(event.thumbnail)
img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB"))
thumbnails["ids"].append(event.id)
thumbnails["images"].append(img)
thumbnails["metadatas"].append(metadata)
if event.data.get("description") is not None:
descriptions["ids"].append(event.id)
descriptions["documents"].append(event.data["description"])
descriptions["metadatas"].append(metadata)
self.thumbnail.upsert(
images=thumbnails["images"],
metadatas=thumbnails["metadatas"],
ids=thumbnails["ids"],
)
self.description.upsert(
documents=descriptions["documents"],
metadatas=descriptions["metadatas"],
ids=descriptions["ids"],
)
logger.info(
"Embedded %d thumbnails and %d descriptions in %s seconds",
len(thumbnails["ids"]),
len(descriptions["ids"]),
time.time() - st,
)
os.remove(f"{CONFIG_DIR}/.reindex_events")

View File

@ -10,18 +10,13 @@ from multiprocessing.synchronize import Event as MpEvent
import google.generativeai as genai
import numpy as np
from chromadb import Collection
from chromadb import HttpClient as ChromaClient
from chromadb.config import Settings
from peewee import DoesNotExist
from PIL import Image
from playhouse.shortcuts import model_to_dict
from frigate.config import FrigateConfig
from frigate.models import Event
from .functions.clip import ClipEmbedding
from .functions.minilm_l6_v2 import MiniLMEmbedding
from . import Embeddings, get_metadata
logger = logging.getLogger(__name__)
@ -32,39 +27,25 @@ class EmbeddingProcessor(threading.Thread):
def __init__(
self,
config: FrigateConfig,
embeddings: Embeddings,
queue: Queue,
stop_event: MpEvent,
) -> None:
threading.Thread.__init__(self)
self.name = "chroma"
self.config = config
self.embeddings = embeddings
self.queue = queue
self.stop_event = stop_event
self.chroma: ChromaClient = None
self.thumbnail: Collection = None
self.description: Collection = None
self.gemini: genai.GenerativeModel = None
def run(self) -> None:
"""Maintain a Chroma vector database for semantic search."""
# Exit if disabled
if not self.config.semantic_search.enabled:
if self.embeddings is None:
return
# Create the database
self.chroma = ChromaClient(
host="127.0.0.1", settings=Settings(anonymized_telemetry=False)
)
# Create/Load the collection(s)
self.thumbnail = self.chroma.get_or_create_collection(
name="event_thumbnail", embedding_function=ClipEmbedding()
)
self.description = self.chroma.get_or_create_collection(
name="event_description", embedding_function=MiniLMEmbedding()
)
## Initialize Gemini
if self.config.gemini.enabled:
genai.configure(api_key=self.config.gemini.api_key)
@ -88,13 +69,7 @@ class EmbeddingProcessor(threading.Thread):
continue
# Extract valid event metadata
metadata = {
k: v
for k, v in model_to_dict(event).items()
if k not in ["id", "thumbnail"]
and v is not None
and isinstance(v, (str, int, float, bool))
}
metadata = get_metadata(event)
thumbnail = base64.b64decode(event.thumbnail)
# Encode the thumbnail
@ -124,7 +99,7 @@ class EmbeddingProcessor(threading.Thread):
# Encode the thumbnail
img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB"))
self.thumbnail.add(
self.embeddings.thumbnail.upsert(
images=[img],
metadatas=[metadata],
ids=[event_id],
@ -163,7 +138,7 @@ class EmbeddingProcessor(threading.Thread):
event.save()
# Encode the description
self.description.add(
self.embeddings.description.upsert(
documents=[description],
metadatas=[metadata],
ids=[event.id],

View File

@ -10,6 +10,7 @@ from pathlib import Path
from frigate.config import FrigateConfig
from frigate.const import CLIPS_DIR
from frigate.embeddings import Embeddings
from frigate.models import Event, Timeline
logger = logging.getLogger(__name__)
@ -21,10 +22,13 @@ class EventCleanupType(str, Enum):
class EventCleanup(threading.Thread):
def __init__(self, config: FrigateConfig, stop_event: MpEvent):
def __init__(
self, config: FrigateConfig, embeddings: Embeddings, stop_event: MpEvent
):
threading.Thread.__init__(self)
self.name = "event_cleanup"
self.config = config
self.embeddings = embeddings
self.stop_event = stop_event
self.camera_keys = list(self.config.cameras.keys())
self.removed_camera_labels: list[str] = None
@ -204,11 +208,12 @@ class EventCleanup(threading.Thread):
media_path = Path(f"{os.path.join(CLIPS_DIR, media_name)}-clean.png")
media_path.unlink(missing_ok=True)
(
Event.delete()
.where(Event.id << [event.id for event in duplicate_events])
.execute()
)
duplicate_ids = [event.id for event in duplicate_events]
Event.delete().where(Event.id << duplicate_ids).execute()
# Also remove from embeddings database
if self.embeddings is not None and len(duplicate_ids) > 0:
self.embeddings.thumbnail.delete(duplicate_ids)
self.embeddings.description.delete(duplicate_ids)
def run(self) -> None:
# only expire events every 5 minutes
@ -223,10 +228,20 @@ class EventCleanup(threading.Thread):
self.expire(EventCleanupType.snapshots)
self.purge_duplicates()
# drop events from db where has_clip and has_snapshot are false
delete_query = Event.delete().where(
Event.has_clip == False, Event.has_snapshot == False
# get list of ids that have both expired clips and snapshots
# so we can delete them from the embeddings db (and the events table)
events = (
Event.select(Event.id)
.where(Event.has_clip == False, Event.has_snapshot == False)
.iterator()
)
delete_query.execute()
events_to_delete = [e.id for e in events]
if self.embeddings is not None and len(events_to_delete) > 0:
self.embeddings.thumbnail.delete(events_to_delete)
self.embeddings.description.delete(events_to_delete)
# drop events from db where has_clip and has_snapshot are false
Event.delete().where(Event.id << events_to_delete).execute()
logger.info("Exiting event cleanup...")

View File

@ -1,6 +1,7 @@
import base64
import copy
import glob
import io
import json
import logging
import os
@ -18,8 +19,6 @@ import numpy as np
import pytz
import requests
from chromadb import Collection, QueryResult
from chromadb import HttpClient as ChromaClient
from chromadb.config import Settings
from flask import (
Blueprint,
Flask,
@ -31,6 +30,7 @@ from flask import (
request,
)
from peewee import DoesNotExist, fn, operator
from PIL import Image
from playhouse.shortcuts import model_to_dict
from playhouse.sqliteq import SqliteQueueDatabase
from tzlocal import get_localzone_name
@ -45,8 +45,7 @@ from frigate.const import (
MAX_SEGMENT_DURATION,
RECORD_DIR,
)
from frigate.embeddings.functions.clip import ClipEmbedding
from frigate.embeddings.functions.minilm_l6_v2 import MiniLMEmbedding
from frigate.embeddings import Embeddings
from frigate.events.external import ExternalEventProcessor
from frigate.models import Event, Previews, Recordings, Regions, Timeline
from frigate.object_processing import TrackedObject
@ -79,6 +78,7 @@ def create_app(
onvif: OnvifController,
external_processor: ExternalEventProcessor,
plus_api: PlusApi,
embeddings: Embeddings,
):
app = Flask(__name__)
@ -108,15 +108,7 @@ def create_app(
app.plus_api = plus_api
app.camera_error_image = None
app.hwaccel_errors = []
app.chroma = ChromaClient(
host="127.0.0.1", settings=Settings(anonymized_telemetry=False)
)
app.thumbnail_collection = app.chroma.get_or_create_collection(
name="event_thumbnail", embedding_function=ClipEmbedding()
)
app.description_collection = app.chroma.get_or_create_collection(
name="event_description", embedding_function=MiniLMEmbedding()
)
app.embeddings = embeddings
app.register_blueprint(bp)
@ -529,6 +521,9 @@ def delete_event(id):
event.delete_instance()
Timeline.delete().where(Timeline.source_id == id).execute()
if current_app.embeddings is not None:
current_app.embeddings.thumbnail.delete(ids=[id])
current_app.embeddings.description.delete(ids=[id])
return make_response(
jsonify({"success": True, "message": "Event " + id + " deleted"}), 200
)
@ -1013,6 +1008,7 @@ def events():
min_length = request.args.get("min_length", type=float)
max_length = request.args.get("max_length", type=float)
search = request.args.get("search", type=str) or None
like = request.args.get("like", type=str) or None
clauses = []
@ -1168,37 +1164,57 @@ def events():
# Handle semantic search
event_order = None
if search is not None:
if current_app.embeddings is not None:
where = None
if len(embeddings_filters) > 1:
where = {"$and": embeddings_filters}
elif len(embeddings_filters) == 1:
where = embeddings_filters[0]
# Grab the ids of the events that match based on CLIP embeddings
thumbnails: Collection = current_app.thumbnail_collection
thumb_result: QueryResult = thumbnails.query(
query_texts=[search],
n_results=int(limit),
where=where,
)
thumb_ids = dict(zip(thumb_result["ids"][0], thumb_result["distances"][0]))
if like is not None:
# Grab the ids of events that match the thumbnail image embeddings
thumbnails: Collection = current_app.embeddings.thumbnail
search_event = Event.get(Event.id == like)
thumbnail = base64.b64decode(search_event.thumbnail)
img = np.array(Image.open(io.BytesIO(thumbnail)).convert("RGB"))
thumb_result: QueryResult = thumbnails.query(
query_images=[img],
n_results=int(limit),
where=where,
)
event_order = dict(
zip(thumb_result["ids"][0], thumb_result["distances"][0])
)
# Grab the ids of the events that match based on MiniLM embeddings
descriptions: Collection = current_app.description_collection
desc_result: QueryResult = descriptions.query(
query_texts=[search],
n_results=int(limit),
where=where,
)
desc_ids = dict(zip(desc_result["ids"][0], desc_result["distances"][0]))
# For like, we want to remove all other filters
clauses = [(Event.id << list(event_order.keys()))]
event_order = {
k: min(i for i in (thumb_ids.get(k), desc_ids.get(k)) if i is not None)
for k in thumb_ids.keys() | desc_ids
}
elif search is not None:
# Grab the ids of the events that match based on CLIP embeddings
thumbnails: Collection = current_app.embeddings.thumbnail
thumb_result: QueryResult = thumbnails.query(
query_texts=[search],
n_results=int(limit),
where=where,
)
thumb_ids = dict(zip(thumb_result["ids"][0], thumb_result["distances"][0]))
clauses.append((Event.id << list(event_order.keys())))
# Grab the ids of the events that match based on MiniLM embeddings
descriptions: Collection = current_app.embeddings.description
desc_result: QueryResult = descriptions.query(
query_texts=[search],
n_results=int(limit),
where=where,
)
desc_ids = dict(zip(desc_result["ids"][0], desc_result["distances"][0]))
event_order = {
k: min(i for i in (thumb_ids.get(k), desc_ids.get(k)) if i is not None)
for k in thumb_ids.keys() | desc_ids
}
# For search, we want to keep all the other clauses and filters
clauses.append((Event.id << list(event_order.keys())))
events = (
Event.select(*selected_columns)

View File

@ -259,6 +259,16 @@ export default function Events({ path, ...props }) {
setState({ ...state, showDownloadMenu: true });
};
const showSimilarEvents = (event_id, e) => {
if (e) {
e.stopPropagation();
}
if (searchParams?.like == event_id) {
return;
}
onFilter('like', event_id);
};
const showSubmitToPlus = (event_id, label, box, e) => {
if (e) {
e.stopPropagation();
@ -289,6 +299,7 @@ export default function Events({ path, ...props }) {
(name, value) => {
setShowInProgress(false);
const updatedParams = { ...searchParams, [name]: value };
if (name !== 'like') delete updatedParams['like'];
setSearchParams(updatedParams);
const queryString = Object.keys(updatedParams)
.map((key) => {
@ -311,7 +322,10 @@ export default function Events({ path, ...props }) {
onFilter('is_submitted', searchParams.is_submitted);
}, [searchParams, onFilter]);
const isDone = (eventPages?.[eventPages.length - 1]?.length ?? 0) < API_LIMIT;
const isDone =
(eventPages?.[eventPages.length - 1]?.length ?? 0) < API_LIMIT ||
(searchParams?.search?.length ?? 0) > 0 ||
(searchParams?.like?.length ?? 0) > 0;
// hooks for infinite scroll
const observer = useRef();
@ -471,7 +485,8 @@ export default function Events({ path, ...props }) {
download
/>
)}
{(event?.data?.type || 'object') == 'object' &&
{config.plus.enabled &&
(event?.data?.type || 'object') == 'object' &&
downloadEvent.end_time &&
downloadEvent.has_snapshot &&
!downloadEvent.plus_id && (
@ -482,7 +497,7 @@ export default function Events({ path, ...props }) {
onSelect={() => showSubmitToPlus(downloadEvent.id, downloadEvent.label, downloadEvent.box)}
/>
)}
{downloadEvent.plus_id && (
{config.plus.enabled && downloadEvent.plus_id && (
<MenuItem
icon={UploadPlus}
label={'Sent to Frigate+'}
@ -710,6 +725,7 @@ export default function Events({ path, ...props }) {
});
}}
onSave={onSave}
showSimilarEvents={showSimilarEvents}
showSubmitToPlus={showSubmitToPlus}
/>
);
@ -750,6 +766,7 @@ export default function Events({ path, ...props }) {
});
}}
onSave={onSave}
showSimilarEvents={showSimilarEvents}
showSubmitToPlus={showSubmitToPlus}
/>
);
@ -782,6 +799,7 @@ function Event({
onDownloadClick,
onReady,
onSave,
showSimilarEvents,
showSubmitToPlus,
}) {
const apiHost = useApiHost();
@ -851,30 +869,39 @@ function Event({
</div>
</div>
<div class="hidden sm:flex flex-col justify-end mr-2">
{event.end_time && event.has_snapshot && (event?.data?.type || 'object') == 'object' && (
<Fragment>
{event.plus_id ? (
<div className="uppercase text-xs underline">
<Link
href={`https://plus.frigate.video/dashboard/edit-image/?id=${event.plus_id}`}
target="_blank"
rel="nofollow"
>
Edit in Frigate+
</Link>
</div>
) : (
<Button
color="gray"
disabled={uploading.includes(event.id)}
onClick={(e) => showSubmitToPlus(event.id, event.label, event?.data?.box || event.box, e)}
>
{uploading.includes(event.id) ? 'Uploading...' : 'Send to Frigate+'}
</Button>
)}
</Fragment>
{event.id && (
<Button color="gray" onClick={(e) => showSimilarEvents(event.id, e)}>
Find Similar
</Button>
)}
</div>
{config.plus.enabled && (
<div class="hidden sm:flex flex-col justify-end mr-2">
{event.end_time && event.has_snapshot && (event?.data?.type || 'object') == 'object' && (
<Fragment>
{event.plus_id ? (
<div className="uppercase text-xs underline">
<Link
href={`https://plus.frigate.video/dashboard/edit-image/?id=${event.plus_id}`}
target="_blank"
rel="nofollow"
>
Edit in Frigate+
</Link>
</div>
) : (
<Button
color="gray"
disabled={uploading.includes(event.id)}
onClick={(e) => showSubmitToPlus(event.id, event.label, event?.data?.box || event.box, e)}
>
{uploading.includes(event.id) ? 'Uploading...' : 'Send to Frigate+'}
</Button>
)}
</Fragment>
)}
</div>
)}
<div class="flex flex-col">
<Delete
className="h-6 w-6 cursor-pointer"