diff --git a/frigate/api/event.py b/frigate/api/event.py index 285c03850..8592deb3f 100644 --- a/frigate/api/event.py +++ b/frigate/api/event.py @@ -1255,6 +1255,38 @@ def regenerate_description( ) +@router.post( + "/description/generate", + response_model=GenericResponse, + # dependencies=[Depends(require_role(["admin"]))], +) +def generate_description_embedding( + request: Request, + body: EventsDescriptionBody, +): + new_description = body.description + + # If semantic search is enabled, update the index + if request.app.frigate_config.semantic_search.enabled: + context: EmbeddingsContext = request.app.embeddings + if len(new_description) > 0: + result = context.generate_description_embedding( + new_description, + ) + + return JSONResponse( + content=( + { + "success": True, + "message": f"Embedding for description is {result}" + if result + else "Failed to generate embedding", + } + ), + status_code=200, + ) + + def delete_single_event(event_id: str, request: Request) -> dict: try: event = Event.get(Event.id == event_id) diff --git a/frigate/config/camera/camera.py b/frigate/config/camera/camera.py index 33ad312a2..c356984f3 100644 --- a/frigate/config/camera/camera.py +++ b/frigate/config/camera/camera.py @@ -22,6 +22,7 @@ from ..classification import ( AudioTranscriptionConfig, CameraFaceRecognitionConfig, CameraLicensePlateRecognitionConfig, + CameraSemanticSearchConfig, ) from .audio import AudioConfig from .birdseye import BirdseyeCameraConfig @@ -91,6 +92,10 @@ class CameraConfig(FrigateBaseModel): review: ReviewConfig = Field( default_factory=ReviewConfig, title="Review configuration." ) + semantic_search: CameraSemanticSearchConfig = Field( + default_factory=CameraSemanticSearchConfig, + title="Semantic search configuration.", + ) snapshots: SnapshotsConfig = Field( default_factory=SnapshotsConfig, title="Snapshot configuration." ) diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 6430c96fa..66b6c20ea 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -10,6 +10,7 @@ __all__ = [ "CameraLicensePlateRecognitionConfig", "FaceRecognitionConfig", "SemanticSearchConfig", + "CameraSemanticSearchConfig", "LicensePlateRecognitionConfig", ] @@ -113,6 +114,14 @@ class SemanticSearchConfig(FrigateBaseModel): ) +class CameraSemanticSearchConfig(FrigateBaseModel): + triggers: Optional[list[str]] = Field( + default=None, title="Text phrases to elevate tracked objects to review alerts." + ) + + model_config = ConfigDict(extra="forbid", protected_namespaces=()) + + class FaceRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable face recognition.") model_size: str = Field( diff --git a/frigate/data_processing/real_time/semantic_trigger.py b/frigate/data_processing/real_time/semantic_trigger.py new file mode 100644 index 000000000..c37076e4a --- /dev/null +++ b/frigate/data_processing/real_time/semantic_trigger.py @@ -0,0 +1,81 @@ +"""Real time processor to trigger alerts by matching embeddings.""" + +import datetime +import logging +from typing import Any + +import cv2 +import numpy as np + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FrigateConfig +from frigate.config.classification import CameraSemanticSearchConfig +from frigate.util.builtin import EventsPerSecond, InferenceSpeed + +from ..types import DataProcessorMetrics +from .api import RealTimeProcessorApi + +logger = logging.getLogger(__name__) + + +class SemanticTriggerProcessor(RealTimeProcessorApi): + def __init__( + self, + config: FrigateConfig, + trigger_config: CameraSemanticSearchConfig, + requestor: InterProcessRequestor, + metrics: DataProcessorMetrics, + embeddings, + ): + super().__init__(config, metrics) + self.embeddings = embeddings + self.trigger_config = trigger_config + self.requestor = requestor + self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed) + self.image_eps = EventsPerSecond() + self.text_inference_speed = InferenceSpeed(self.metrics.text_embeddings_speed) + self.text_eps = EventsPerSecond() + self.trigger_embeddings: list[np.ndarray] = [] + self.last_run = datetime.datetime.now().timestamp() + self.__generate_trigger_embeddings() + + def __generate_trigger_embeddings(self) -> None: + self.image_eps.start() + self.text_eps.start() + for trigger in self.trigger_config.triggers: + embedding = self.embeddings.embed_description(None, trigger, upsert=False) + self.trigger_embeddings.append(embedding) + + def __update_metrics(self, duration: float) -> None: + self.image_eps.update() + self.image_inference_speed.update(duration) + + def process_frame(self, frame_data: dict[str, Any], frame: np.ndarray): + # self.metrics.classification_cps[ + # self.model_config.name + # ].value = self.classifications_per_second.eps() + camera = frame_data.get("camera") + + now = datetime.datetime.now().timestamp() + + rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) + img_embedding = self.embeddings.embed_thumbnail(None, rgb, upsert=False) + self.__update_metrics(datetime.datetime.now().timestamp() - now) + + if camera != "framecache": + return + + for trigger_embedding in self.trigger_embeddings: + for trigger in self.trigger_config.triggers: + dot_product = np.dot(img_embedding, trigger_embedding) + norm_img_embedding = np.linalg.norm(img_embedding) + norm_trigger_embedding = np.linalg.norm(trigger_embedding) + logger.info( + f"{camera}: Cosine similarity is {dot_product / (norm_img_embedding * norm_trigger_embedding)}" + ) + + def handle_request(self, topic, request_data): + return None + + def expire_object(self, object_id, camera): + pass diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index c44227a72..12b42c7bb 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -287,3 +287,9 @@ class EmbeddingsContext: return self.requestor.send_data( EmbeddingsRequestEnum.transcribe_audio.value, {"event": event} ) + + def generate_description_embedding(self, text: str) -> None: + return self.requestor.send_data( + EmbeddingsRequestEnum.embed_description.value, + {"id": None, "description": text, "upsert": False}, + ) diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index c659d04fe..6df167495 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -56,6 +56,7 @@ from frigate.data_processing.real_time.face import FaceRealTimeProcessor from frigate.data_processing.real_time.license_plate import ( LicensePlateRealTimeProcessor, ) +from frigate.data_processing.real_time.semantic_trigger import SemanticTriggerProcessor from frigate.data_processing.types import DataProcessorMetrics, PostProcessDataEnum from frigate.db.sqlitevecq import SqliteVecQueueDatabase from frigate.events.types import EventTypeEnum, RegenerateDescriptionEnum @@ -188,6 +189,16 @@ class EmbeddingMaintainer(threading.Thread): ) ) + self.realtime_processors.append( + SemanticTriggerProcessor( + self.config, + self.config.cameras["orlandocam"].semantic_search, + self.requestor, + metrics, + self.embeddings, + ) + ) + # post processors self.post_processors: list[PostProcessorApi] = []