From f0812d1af1faac1e7c119b1fae26b1b5c0074a06 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 22 Oct 2024 15:59:33 -0600 Subject: [PATCH 01/18] Update version --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b7c6ab821..5500174af 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ default_target: local COMMIT_HASH := $(shell git log -1 --pretty=format:"%h"|tail -1) -VERSION = 0.15.0 +VERSION = 0.16.0 IMAGE_REPO ?= ghcr.io/blakeblackshear/frigate GITHUB_REF_NAME ?= $(shell git rev-parse --abbrev-ref HEAD) BOARDS= #Initialized empty From 4c2ac0492b4bfdc5109bb194fe5a0da01d130023 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 22 Oct 2024 16:05:48 -0600 Subject: [PATCH 02/18] Face recognition backend (#14495) * Add basic config and face recognition table * Reconfigure updates processing to handle face * Crop frame to face box * Implement face embedding calculation * Get matching face embeddings * Add support face recognition based on existing faces * Use arcface face embeddings instead of generic embeddings model * Add apis for managing faces * Implement face uploading API * Build out more APIs * Add min area config * Handle larger images * Add more debug logs * fix calculation * Reduce timeout * Small tweaks * Use webp images * Use facenet model --- docker/main/requirements-wheels.txt | 2 + frigate/api/classification.py | 56 +++++++++ frigate/api/defs/tags.py | 1 + frigate/api/fastapi_app.py | 12 +- frigate/comms/embeddings_updater.py | 3 +- frigate/config/semantic_search.py | 15 ++- frigate/const.py | 3 +- frigate/db/sqlitevecq.py | 17 ++- frigate/embeddings/__init__.py | 23 ++++ frigate/embeddings/embeddings.py | 50 +++++++- frigate/embeddings/functions/onnx.py | 54 ++++++++- frigate/embeddings/maintainer.py | 170 ++++++++++++++++++++++----- frigate/util/downloader.py | 4 +- 13 files changed, 365 insertions(+), 45 deletions(-) create mode 100644 frigate/api/classification.py diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 4db88ccd2..804f6135e 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -10,6 +10,8 @@ imutils == 0.5.* joserfc == 1.0.* pathvalidate == 3.2.* markupsafe == 2.1.* +python-multipart == 0.0.12 +# General mypy == 1.6.1 numpy == 1.26.* onvif_zeep == 0.2.12 diff --git a/frigate/api/classification.py b/frigate/api/classification.py new file mode 100644 index 000000000..d862008c8 --- /dev/null +++ b/frigate/api/classification.py @@ -0,0 +1,56 @@ +"""Object classification APIs.""" + +import logging + +from fastapi import APIRouter, Request, UploadFile +from fastapi.responses import JSONResponse + +from frigate.api.defs.tags import Tags +from frigate.embeddings import EmbeddingsContext + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=[Tags.events]) + + +@router.get("/faces") +def get_faces(): + return JSONResponse(content={"message": "there are faces"}) + + +@router.post("/faces/{name}") +async def register_face(request: Request, name: str, file: UploadFile): + # if not file.content_type.startswith("image"): + # return JSONResponse( + # status_code=400, + # content={ + # "success": False, + # "message": "Only an image can be used to register a face.", + # }, + # ) + + context: EmbeddingsContext = request.app.embeddings + context.register_face(name, await file.read()) + return JSONResponse( + status_code=200, + content={"success": True, "message": "Successfully registered face."}, + ) + + +@router.delete("/faces") +def deregister_faces(request: Request, body: dict = None): + json: dict[str, any] = body or {} + list_of_ids = json.get("ids", "") + + if not list_of_ids or len(list_of_ids) == 0: + return JSONResponse( + content=({"success": False, "message": "Not a valid list of ids"}), + status_code=404, + ) + + context: EmbeddingsContext = request.app.embeddings + context.delete_face_ids(list_of_ids) + return JSONResponse( + content=({"success": True, "message": "Successfully deleted faces."}), + status_code=200, + ) diff --git a/frigate/api/defs/tags.py b/frigate/api/defs/tags.py index 80faf255c..9e61da9e9 100644 --- a/frigate/api/defs/tags.py +++ b/frigate/api/defs/tags.py @@ -10,4 +10,5 @@ class Tags(Enum): review = "Review" export = "Export" events = "Events" + classification = "classification" auth = "Auth" diff --git a/frigate/api/fastapi_app.py b/frigate/api/fastapi_app.py index 168404ea6..9aa267000 100644 --- a/frigate/api/fastapi_app.py +++ b/frigate/api/fastapi_app.py @@ -11,7 +11,16 @@ from starlette_context import middleware, plugins from starlette_context.plugins import Plugin from frigate.api import app as main_app -from frigate.api import auth, event, export, media, notification, preview, review +from frigate.api import ( + auth, + classification, + event, + export, + media, + notification, + preview, + review, +) from frigate.api.auth import get_jwt_secret, limiter from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, @@ -99,6 +108,7 @@ def create_fastapi_app( # Routes # Order of include_router matters: https://fastapi.tiangolo.com/tutorial/path-params/#order-matters app.include_router(auth.router) + app.include_router(classification.router) app.include_router(review.router) app.include_router(main_app.router) app.include_router(preview.router) diff --git a/frigate/comms/embeddings_updater.py b/frigate/comms/embeddings_updater.py index 9a13525f8..095f33fde 100644 --- a/frigate/comms/embeddings_updater.py +++ b/frigate/comms/embeddings_updater.py @@ -12,6 +12,7 @@ class EmbeddingsRequestEnum(Enum): embed_description = "embed_description" embed_thumbnail = "embed_thumbnail" generate_search = "generate_search" + register_face = "register_face" class EmbeddingsResponder: @@ -22,7 +23,7 @@ class EmbeddingsResponder: def check_for_request(self, process: Callable) -> None: while True: # load all messages that are queued - has_message, _, _ = zmq.select([self.socket], [], [], 0.1) + has_message, _, _ = zmq.select([self.socket], [], [], 0.01) if not has_message: break diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py index 2891050a1..32ff8cf3c 100644 --- a/frigate/config/semantic_search.py +++ b/frigate/config/semantic_search.py @@ -4,7 +4,17 @@ from pydantic import Field from .base import FrigateBaseModel -__all__ = ["SemanticSearchConfig"] +__all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"] + + +class FaceRecognitionConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable face recognition.") + threshold: float = Field( + default=0.9, title="Face similarity score required to be considered a match." + ) + min_area: int = Field( + default=500, title="Min area of face box to consider running face recognition." + ) class SemanticSearchConfig(FrigateBaseModel): @@ -12,6 +22,9 @@ class SemanticSearchConfig(FrigateBaseModel): reindex: Optional[bool] = Field( default=False, title="Reindex all detections on startup." ) + face_recognition: FaceRecognitionConfig = Field( + default_factory=FaceRecognitionConfig, title="Face recognition config." + ) model_size: str = Field( default="small", title="The size of the embeddings model used." ) diff --git a/frigate/const.py b/frigate/const.py index 5976f47b1..4f71f1382 100644 --- a/frigate/const.py +++ b/frigate/const.py @@ -5,8 +5,9 @@ DEFAULT_DB_PATH = f"{CONFIG_DIR}/frigate.db" MODEL_CACHE_DIR = f"{CONFIG_DIR}/model_cache" BASE_DIR = "/media/frigate" CLIPS_DIR = f"{BASE_DIR}/clips" -RECORD_DIR = f"{BASE_DIR}/recordings" EXPORT_DIR = f"{BASE_DIR}/exports" +FACE_DIR = f"{CLIPS_DIR}/faces" +RECORD_DIR = f"{BASE_DIR}/recordings" BIRDSEYE_PIPE = "/tmp/cache/birdseye" CACHE_DIR = "/tmp/cache" FRIGATE_LOCALHOST = "http://127.0.0.1:5000" diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py index ccb75ae54..1447fd48f 100644 --- a/frigate/db/sqlitevecq.py +++ b/frigate/db/sqlitevecq.py @@ -29,6 +29,10 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): ids = ",".join(["?" for _ in event_ids]) self.execute_sql(f"DELETE FROM vec_descriptions WHERE id IN ({ids})", event_ids) + def delete_embeddings_face(self, face_ids: list[str]) -> None: + ids = ",".join(["?" for _ in face_ids]) + self.execute_sql(f"DELETE FROM vec_faces WHERE id IN ({ids})", face_ids) + def drop_embeddings_tables(self) -> None: self.execute_sql(""" DROP TABLE vec_descriptions; @@ -36,8 +40,11 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): self.execute_sql(""" DROP TABLE vec_thumbnails; """) + self.execute_sql(""" + DROP TABLE vec_faces; + """) - def create_embeddings_tables(self) -> None: + def create_embeddings_tables(self, face_recognition: bool) -> None: """Create vec0 virtual table for embeddings""" self.execute_sql(""" CREATE VIRTUAL TABLE IF NOT EXISTS vec_thumbnails USING vec0( @@ -51,3 +58,11 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): description_embedding FLOAT[768] distance_metric=cosine ); """) + + if face_recognition: + self.execute_sql(""" + CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0( + id TEXT PRIMARY KEY, + face_embedding FLOAT[128] distance_metric=cosine + ); + """) diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index 7f2e1a10c..235b15df3 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -1,5 +1,6 @@ """SQLite-vec embeddings database.""" +import base64 import json import logging import multiprocessing as mp @@ -189,6 +190,28 @@ class EmbeddingsContext: return results + def register_face(self, face_name: str, image_data: bytes) -> None: + self.requestor.send_data( + EmbeddingsRequestEnum.register_face.value, + { + "face_name": face_name, + "image": base64.b64encode(image_data).decode("ASCII"), + }, + ) + + def get_face_ids(self, name: str) -> list[str]: + sql_query = f""" + SELECT + id + FROM vec_descriptions + WHERE id LIKE '%{name}%' + """ + + return self.db.execute_sql(sql_query).fetchall() + + def delete_face_ids(self, ids: list[str]) -> None: + self.db.delete_embeddings_face(ids) + def update_description(self, event_id: str, description: str) -> None: self.requestor.send_data( EmbeddingsRequestEnum.embed_description.value, diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index d77a9eecf..6b0f94ca9 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -3,6 +3,8 @@ import base64 import logging import os +import random +import string import time from numpy import ndarray @@ -12,6 +14,7 @@ from frigate.comms.inter_process import InterProcessRequestor from frigate.config.semantic_search import SemanticSearchConfig from frigate.const import ( CONFIG_DIR, + FACE_DIR, UPDATE_EMBEDDINGS_REINDEX_PROGRESS, UPDATE_MODEL_STATE, ) @@ -67,7 +70,7 @@ class Embeddings: self.requestor = InterProcessRequestor() # Create tables if they don't exist - self.db.create_embeddings_tables() + self.db.create_embeddings_tables(self.config.face_recognition.enabled) models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", @@ -121,6 +124,21 @@ class Embeddings: device="GPU" if config.model_size == "large" else "CPU", ) + self.face_embedding = None + + if self.config.face_recognition.enabled: + self.face_embedding = GenericONNXEmbedding( + model_name="facenet", + model_file="facenet.onnx", + download_urls={ + "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.face, + requestor=self.requestor, + device="GPU", + ) + def embed_thumbnail( self, event_id: str, thumbnail: bytes, upsert: bool = True ) -> ndarray: @@ -215,12 +233,40 @@ class Embeddings: return embeddings + def embed_face(self, label: str, thumbnail: bytes, upsert: bool = False) -> ndarray: + embedding = self.face_embedding(thumbnail)[0] + + if upsert: + rand_id = "".join( + random.choices(string.ascii_lowercase + string.digits, k=6) + ) + id = f"{label}-{rand_id}" + + # write face to library + folder = os.path.join(FACE_DIR, label) + file = os.path.join(folder, f"{id}.webp") + os.makedirs(folder, exist_ok=True) + + # save face image + with open(file, "wb") as output: + output.write(thumbnail) + + self.db.execute_sql( + """ + INSERT OR REPLACE INTO vec_faces(id, face_embedding) + VALUES(?, ?) + """, + (id, serialize(embedding)), + ) + + return embedding + def reindex(self) -> None: logger.info("Indexing tracked object embeddings...") self.db.drop_embeddings_tables() logger.debug("Dropped embeddings tables.") - self.db.create_embeddings_tables() + self.db.create_embeddings_tables(self.config.face_recognition.enabled) logger.debug("Created embeddings tables.") # Delete the saved stats file diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 6ea495a30..9fc71d502 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -31,6 +31,8 @@ warnings.filterwarnings( disable_progress_bar() logger = logging.getLogger(__name__) +FACE_EMBEDDING_SIZE = 160 + class ModelTypeEnum(str, Enum): face = "face" @@ -47,7 +49,7 @@ class GenericONNXEmbedding: model_file: str, download_urls: Dict[str, str], model_size: str, - model_type: str, + model_type: ModelTypeEnum, requestor: InterProcessRequestor, tokenizer_file: Optional[str] = None, device: str = "AUTO", @@ -57,7 +59,7 @@ class GenericONNXEmbedding: self.tokenizer_file = tokenizer_file self.requestor = requestor self.download_urls = download_urls - self.model_type = model_type # 'text' or 'vision' + self.model_type = model_type self.model_size = model_size self.device = device self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name) @@ -93,6 +95,7 @@ class GenericONNXEmbedding: def _download_model(self, path: str): try: file_name = os.path.basename(path) + if file_name in self.download_urls: ModelDownloader.download_from_url(self.download_urls[file_name], path) elif ( @@ -101,6 +104,7 @@ class GenericONNXEmbedding: ): if not os.path.exists(path + "/" + self.model_name): logger.info(f"Downloading {self.model_name} tokenizer") + tokenizer = AutoTokenizer.from_pretrained( self.model_name, trust_remote_code=True, @@ -131,8 +135,11 @@ class GenericONNXEmbedding: self.downloader.wait_for_download() if self.model_type == ModelTypeEnum.text: self.tokenizer = self._load_tokenizer() - else: + elif self.model_type == ModelTypeEnum.vision: self.feature_extractor = self._load_feature_extractor() + elif self.model_type == ModelTypeEnum.face: + self.feature_extractor = [] + self.runner = ONNXModelRunner( os.path.join(self.download_path, self.model_file), self.device, @@ -172,16 +179,51 @@ class GenericONNXEmbedding: self.feature_extractor(images=image, return_tensors="np") for image in processed_images ] + elif self.model_type == ModelTypeEnum.face: + if isinstance(raw_inputs, list): + raise ValueError("Face embedding does not support batch inputs.") + + pil = self._process_image(raw_inputs) + + # handle images larger than input size + width, height = pil.size + if width != FACE_EMBEDDING_SIZE or height != FACE_EMBEDDING_SIZE: + if width > height: + new_height = int(((height / width) * FACE_EMBEDDING_SIZE) // 4 * 4) + pil = pil.resize((FACE_EMBEDDING_SIZE, new_height)) + else: + new_width = int(((width / height) * FACE_EMBEDDING_SIZE) // 4 * 4) + pil = pil.resize((new_width, FACE_EMBEDDING_SIZE)) + + og = np.array(pil).astype(np.float32) + + # Image must be FACE_EMBEDDING_SIZExFACE_EMBEDDING_SIZE + og_h, og_w, channels = og.shape + frame = np.full( + (FACE_EMBEDDING_SIZE, FACE_EMBEDDING_SIZE, channels), + (0, 0, 0), + dtype=np.float32, + ) + + # compute center offset + x_center = (FACE_EMBEDDING_SIZE - og_w) // 2 + y_center = (FACE_EMBEDDING_SIZE - og_h) // 2 + + # copy img image into center of result image + frame[y_center : y_center + og_h, x_center : x_center + og_w] = og + + frame = np.expand_dims(frame, axis=0) + return [{"image_input": frame}] else: raise ValueError(f"Unable to preprocess inputs for {self.model_type}") - def _process_image(self, image): + def _process_image(self, image, output: str = "RGB") -> Image.Image: if isinstance(image, str): if image.startswith("http"): response = requests.get(image) - image = Image.open(BytesIO(response.content)).convert("RGB") + image = Image.open(BytesIO(response.content)).convert(output) elif isinstance(image, bytes): - image = Image.open(BytesIO(image)).convert("RGB") + image = Image.open(BytesIO(image)).convert(output) return image diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index d58a7f431..104d44bbc 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -9,6 +9,7 @@ from typing import Optional import cv2 import numpy as np +import requests from peewee import DoesNotExist from playhouse.sqliteq import SqliteQueueDatabase @@ -20,13 +21,13 @@ from frigate.comms.event_metadata_updater import ( from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig -from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION +from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION from frigate.events.types import EventTypeEnum from frigate.genai import get_genai_client from frigate.models import Event from frigate.types import TrackedObjectUpdateTypesEnum from frigate.util.builtin import serialize -from frigate.util.image import SharedMemoryFrameManager, calculate_region +from frigate.util.image import SharedMemoryFrameManager, area, calculate_region from .embeddings import Embeddings @@ -59,10 +60,17 @@ class EmbeddingMaintainer(threading.Thread): ) self.embeddings_responder = EmbeddingsResponder() self.frame_manager = SharedMemoryFrameManager() + + # set face recognition conditions + self.face_recognition_enabled = ( + self.config.semantic_search.face_recognition.enabled + ) + self.requires_face_detection = "face" not in self.config.model.all_attributes + # create communication for updating event descriptions self.requestor = InterProcessRequestor() self.stop_event = stop_event - self.tracked_events = {} + self.tracked_events: dict[str, list[any]] = {} self.genai_client = get_genai_client(config) def run(self) -> None: @@ -102,6 +110,13 @@ class EmbeddingMaintainer(threading.Thread): return serialize( self.embeddings.text_embedding([data])[0], pack=False ) + elif topic == EmbeddingsRequestEnum.register_face.value: + self.embeddings.embed_face( + data["face_name"], + base64.b64decode(data["image"]), + upsert=True, + ) + return None except Exception as e: logger.error(f"Unable to handle embeddings request {e}") @@ -109,7 +124,7 @@ class EmbeddingMaintainer(threading.Thread): def _process_updates(self) -> None: """Process event updates""" - update = self.event_subscriber.check_for_update(timeout=0.1) + update = self.event_subscriber.check_for_update(timeout=0.01) if update is None: return @@ -120,42 +135,47 @@ class EmbeddingMaintainer(threading.Thread): return camera_config = self.config.cameras[camera] - # no need to save our own thumbnails if genai is not enabled - # or if the object has become stationary - if ( - not camera_config.genai.enabled - or self.genai_client is None - or data["stationary"] - ): - return - if data["id"] not in self.tracked_events: - self.tracked_events[data["id"]] = [] + # no need to process updated objects if face recognition and genai are disabled + if not camera_config.genai.enabled and not self.face_recognition_enabled: + return # Create our own thumbnail based on the bounding box and the frame time try: - yuv_frame = self.frame_manager.get( - frame_name, camera_config.frame_shape_yuv - ) - - if yuv_frame is not None: - data["thumbnail"] = self._create_thumbnail(yuv_frame, data["box"]) - - # Limit the number of thumbnails saved - if len(self.tracked_events[data["id"]]) >= MAX_THUMBNAILS: - # Always keep the first thumbnail for the event - self.tracked_events[data["id"]].pop(1) - - self.tracked_events[data["id"]].append(data) - - self.frame_manager.close(frame_name) + yuv_frame = self.frame_manager.get(frame_name, camera_config.frame_shape_yuv) except FileNotFoundError: pass + if yuv_frame is None: + logger.debug( + "Unable to process object update because frame is unavailable." + ) + return + + if self.face_recognition_enabled: + self._process_face(data, yuv_frame) + + # no need to save our own thumbnails if genai is not enabled + # or if the object has become stationary + if self.genai_client is not None and not data["stationary"]: + if data["id"] not in self.tracked_events: + self.tracked_events[data["id"]] = [] + + data["thumbnail"] = self._create_thumbnail(yuv_frame, data["box"]) + + # Limit the number of thumbnails saved + if len(self.tracked_events[data["id"]]) >= MAX_THUMBNAILS: + # Always keep the first thumbnail for the event + self.tracked_events[data["id"]].pop(1) + + self.tracked_events[data["id"]].append(data) + + self.frame_manager.close(frame_name) + def _process_finalized(self) -> None: """Process the end of an event.""" while True: - ended = self.event_end_subscriber.check_for_update(timeout=0.1) + ended = self.event_end_subscriber.check_for_update(timeout=0.01) if ended == None: break @@ -245,7 +265,7 @@ class EmbeddingMaintainer(threading.Thread): def _process_event_metadata(self): # Check for regenerate description requests (topic, event_id, source) = self.event_metadata_subscriber.check_for_update( - timeout=0.1 + timeout=0.01 ) if topic is None: @@ -254,6 +274,94 @@ class EmbeddingMaintainer(threading.Thread): if event_id: self.handle_regenerate_description(event_id, source) + def _search_face(self, query_embedding: bytes) -> list: + """Search for the face most closely matching the embedding.""" + sql_query = """ + SELECT + id, + distance + FROM vec_faces + WHERE face_embedding MATCH ? + AND k = 10 ORDER BY distance + """ + return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall() + + def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None: + """Look for faces in image.""" + # don't run for non person objects + if obj_data.get("label") != "person": + logger.debug("Not a processing face for non person object.") + return + + # don't overwrite sub label for objects that have one + if obj_data.get("sub_label"): + logger.debug( + f"Not processing face due to existing sub label: {obj_data.get('sub_label')}." + ) + return + + face: Optional[dict[str, any]] = None + + if self.requires_face_detection: + # TODO run cv2 face detection + pass + else: + # don't run for object without attributes + if not obj_data.get("current_attributes"): + logger.debug("No attributes to parse.") + return + + attributes: list[dict[str, any]] = obj_data.get("current_attributes", []) + for attr in attributes: + if attr.get("label") != "face": + continue + + if face is None or attr.get("score", 0.0) > face.get("score", 0.0): + face = attr + + # no faces detected in this frame + if not face: + return + + face_box = face.get("box") + + # check that face is valid + if ( + not face_box + or area(face_box) < self.config.semantic_search.face_recognition.min_area + ): + logger.debug(f"Invalid face box {face}") + return + + face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) + face_frame = face_frame[face_box[1] : face_box[3], face_box[0] : face_box[2]] + ret, jpg = cv2.imencode( + ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100] + ) + + if not ret: + logger.debug("Not processing face due to error creating cropped image.") + return + + embedding = self.embeddings.embed_face("unknown", jpg.tobytes(), upsert=False) + query_embedding = serialize(embedding) + best_faces = self._search_face(query_embedding) + logger.debug(f"Detected best faces for person as: {best_faces}") + + if not best_faces: + return + + sub_label = str(best_faces[0][0]).split("-")[0] + score = 1.0 - best_faces[0][1] + + if score < self.config.semantic_search.face_recognition.threshold: + return None + + requests.post( + f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label", + json={"subLabel": sub_label, "subLabelScore": score}, + ) + def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: """Return jpg thumbnail of a region of the frame.""" frame = cv2.cvtColor(yuv_frame, cv2.COLOR_YUV2BGR_I420) diff --git a/frigate/util/downloader.py b/frigate/util/downloader.py index 6685b0bb8..18c577fb0 100644 --- a/frigate/util/downloader.py +++ b/frigate/util/downloader.py @@ -101,7 +101,7 @@ class ModelDownloader: self.download_complete.set() @staticmethod - def download_from_url(url: str, save_path: str, silent: bool = False): + def download_from_url(url: str, save_path: str, silent: bool = False) -> Path: temporary_filename = Path(save_path).with_name( os.path.basename(save_path) + ".part" ) @@ -125,6 +125,8 @@ class ModelDownloader: if not silent: logger.info(f"Downloading complete: {url}") + return Path(save_path) + @staticmethod def mark_files_state( requestor: InterProcessRequestor, From b455bfcf233448d145a377838eea987046b9ec53 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 23 Oct 2024 09:03:18 -0600 Subject: [PATCH 03/18] Improve face recognition (#14537) * Increase requirements for face to be set * Manage faces properly * Add basic docs * Simplify * Separate out face recognition frome semantic search * Update docs * Formatting --- docs/docs/configuration/face_recognition.md | 21 +++++++++ docs/docs/configuration/reference.md | 8 ++++ docs/sidebars.ts | 1 + frigate/config/config.py | 16 ++++++- frigate/config/semantic_search.py | 23 +++++----- frigate/embeddings/embeddings.py | 12 +++-- frigate/embeddings/maintainer.py | 49 +++++++++++++++------ 7 files changed, 96 insertions(+), 34 deletions(-) create mode 100644 docs/docs/configuration/face_recognition.md diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md new file mode 100644 index 000000000..af079a226 --- /dev/null +++ b/docs/docs/configuration/face_recognition.md @@ -0,0 +1,21 @@ +--- +id: face_recognition +title: Face Recognition +--- + +Face recognition allows people to be assigned names and when their face is recognized Frigate will assign the person's name as a sub label. This information is included in the UI, filters, as well as in notifications. + +Frigate has support for FaceNet to create face embeddings, which runs locally. Embeddings are then saved to Frigate's database. + +## Minimum System Requirements + +Face recognition works by running a large AI model locally on your system. Systems without a GPU will not run Face Recognition reliably or at all. + +## Configuration + +Face recognition is disabled by default and requires semantic search to be enabled, face recognition must be enabled in your config file before it can be used. Semantic Search and face recognition are global configuration settings. + +```yaml +face_recognition: + enabled: true +``` \ No newline at end of file diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index bb7ae49a3..915ac0981 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -522,6 +522,14 @@ semantic_search: # NOTE: small model runs on CPU and large model runs on GPU model_size: "small" +# Optional: Configuration for face recognition capability +face_recognition: + # Optional: Enable semantic search (default: shown below) + enabled: False + # Optional: Set the model size used for embeddings. (default: shown below) + # NOTE: small model runs on CPU and large model runs on GPU + model_size: "small" + # Optional: Configuration for AI generated tracked object descriptions # NOTE: Semantic Search must be enabled for this to do anything. # WARNING: Depending on the provider, this will send thumbnails over the internet diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 4ed41d2ad..e214dde88 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -36,6 +36,7 @@ const sidebars: SidebarsConfig = { 'Semantic Search': [ 'configuration/semantic_search', 'configuration/genai', + 'configuration/face_recognition', ], Cameras: [ 'configuration/cameras', diff --git a/frigate/config/config.py b/frigate/config/config.py index 770588b93..88299767e 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -57,7 +57,7 @@ from .logger import LoggerConfig from .mqtt import MqttConfig from .notification import NotificationConfig from .proxy import ProxyConfig -from .semantic_search import SemanticSearchConfig +from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig from .telemetry import TelemetryConfig from .tls import TlsConfig from .ui import UIConfig @@ -159,6 +159,16 @@ class RestreamConfig(BaseModel): model_config = ConfigDict(extra="allow") +def verify_semantic_search_dependent_configs(config: FrigateConfig) -> None: + """Verify that semantic search is enabled if required features are enabled.""" + if not config.semantic_search.enabled: + if config.genai.enabled: + raise ValueError("Genai requires semantic search to be enabled.") + + if config.face_recognition.enabled: + raise ValueError("Face recognition requires semantic to be enabled.") + + def verify_config_roles(camera_config: CameraConfig) -> None: """Verify that roles are setup in the config correctly.""" assigned_roles = list( @@ -320,6 +330,9 @@ class FrigateConfig(FrigateBaseModel): semantic_search: SemanticSearchConfig = Field( default_factory=SemanticSearchConfig, title="Semantic search configuration." ) + face_recognition: FaceRecognitionConfig = Field( + default_factory=FaceRecognitionConfig, title="Face recognition config." + ) ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.") # Detector config @@ -625,6 +638,7 @@ class FrigateConfig(FrigateBaseModel): detector_config.model.compute_model_hash() self.detectors[key] = detector_config + verify_semantic_search_dependent_configs(self) return self @field_validator("cameras") diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py index 32ff8cf3c..7ffdbd137 100644 --- a/frigate/config/semantic_search.py +++ b/frigate/config/semantic_search.py @@ -7,6 +7,16 @@ from .base import FrigateBaseModel __all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"] +class SemanticSearchConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable semantic search.") + reindex: Optional[bool] = Field( + default=False, title="Reindex all detections on startup." + ) + model_size: str = Field( + default="small", title="The size of the embeddings model used." + ) + + class FaceRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable face recognition.") threshold: float = Field( @@ -15,16 +25,3 @@ class FaceRecognitionConfig(FrigateBaseModel): min_area: int = Field( default=500, title="Min area of face box to consider running face recognition." ) - - -class SemanticSearchConfig(FrigateBaseModel): - enabled: bool = Field(default=False, title="Enable semantic search.") - reindex: Optional[bool] = Field( - default=False, title="Reindex all detections on startup." - ) - face_recognition: FaceRecognitionConfig = Field( - default_factory=FaceRecognitionConfig, title="Face recognition config." - ) - model_size: str = Field( - default="small", title="The size of the embeddings model used." - ) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 6b0f94ca9..3fb6d5d26 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -11,7 +11,7 @@ from numpy import ndarray from playhouse.shortcuts import model_to_dict from frigate.comms.inter_process import InterProcessRequestor -from frigate.config.semantic_search import SemanticSearchConfig +from frigate.config import FrigateConfig from frigate.const import ( CONFIG_DIR, FACE_DIR, @@ -62,9 +62,7 @@ def get_metadata(event: Event) -> dict: class Embeddings: """SQLite-vec embeddings database.""" - def __init__( - self, config: SemanticSearchConfig, db: SqliteVecQueueDatabase - ) -> None: + def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None: self.config = config self.db = db self.requestor = InterProcessRequestor() @@ -76,7 +74,7 @@ class Embeddings: "jinaai/jina-clip-v1-text_model_fp16.onnx", "jinaai/jina-clip-v1-tokenizer", "jinaai/jina-clip-v1-vision_model_fp16.onnx" - if config.model_size == "large" + if config.semantic_search.model_size == "large" else "jinaai/jina-clip-v1-vision_model_quantized.onnx", "jinaai/jina-clip-v1-preprocessor_config.json", ] @@ -97,7 +95,7 @@ class Embeddings: download_urls={ "text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx", }, - model_size=config.model_size, + model_size=config.semantic_search.model_size, model_type=ModelTypeEnum.text, requestor=self.requestor, device="CPU", @@ -105,7 +103,7 @@ class Embeddings: model_file = ( "vision_model_fp16.onnx" - if self.config.model_size == "large" + if self.config.semantic_search.model_size == "large" else "vision_model_quantized.onnx" ) diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 104d44bbc..f7947932a 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -33,6 +33,7 @@ from .embeddings import Embeddings logger = logging.getLogger(__name__) +REQUIRED_FACES = 2 MAX_THUMBNAILS = 10 @@ -47,7 +48,7 @@ class EmbeddingMaintainer(threading.Thread): ) -> None: super().__init__(name="embeddings_maintainer") self.config = config - self.embeddings = Embeddings(config.semantic_search, db) + self.embeddings = Embeddings(config, db) # Check if we need to re-index events if config.semantic_search.reindex: @@ -62,10 +63,9 @@ class EmbeddingMaintainer(threading.Thread): self.frame_manager = SharedMemoryFrameManager() # set face recognition conditions - self.face_recognition_enabled = ( - self.config.semantic_search.face_recognition.enabled - ) + self.face_recognition_enabled = self.config.face_recognition.enabled self.requires_face_detection = "face" not in self.config.model.all_attributes + self.detected_faces: dict[str, float] = {} # create communication for updating event descriptions self.requestor = InterProcessRequestor() @@ -183,6 +183,9 @@ class EmbeddingMaintainer(threading.Thread): event_id, camera, updated_db = ended camera_config = self.config.cameras[camera] + if event_id in self.detected_faces: + self.detected_faces.pop(event_id) + if updated_db: try: event: Event = Event.get(Event.id == event_id) @@ -276,25 +279,28 @@ class EmbeddingMaintainer(threading.Thread): def _search_face(self, query_embedding: bytes) -> list: """Search for the face most closely matching the embedding.""" - sql_query = """ + sql_query = f""" SELECT id, distance FROM vec_faces WHERE face_embedding MATCH ? - AND k = 10 ORDER BY distance + AND k = {REQUIRED_FACES} ORDER BY distance """ return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall() def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None: """Look for faces in image.""" + id = obj_data["id"] + # don't run for non person objects if obj_data.get("label") != "person": logger.debug("Not a processing face for non person object.") return - # don't overwrite sub label for objects that have one - if obj_data.get("sub_label"): + # don't overwrite sub label for objects that have a sub label + # that is not a face + if obj_data.get("sub_label") and id not in self.detected_faces: logger.debug( f"Not processing face due to existing sub label: {obj_data.get('sub_label')}." ) @@ -348,18 +354,35 @@ class EmbeddingMaintainer(threading.Thread): best_faces = self._search_face(query_embedding) logger.debug(f"Detected best faces for person as: {best_faces}") - if not best_faces: + if not best_faces or len(best_faces) < REQUIRED_FACES: return sub_label = str(best_faces[0][0]).split("-")[0] - score = 1.0 - best_faces[0][1] + avg_score = 0 - if score < self.config.semantic_search.face_recognition.threshold: + for face in best_faces: + score = 1.0 - face[1] + + if face[0] != sub_label: + logger.debug("Detected multiple faces, result is not valid.") + return None + + avg_score += score + + avg_score = avg_score / REQUIRED_FACES + + if avg_score < self.config.semantic_search.face_recognition.threshold or ( + id in self.detected_faces and avg_score <= self.detected_faces[id] + ): + logger.debug( + "Detected face does not score higher than threshold / previous face." + ) return None + self.detected_faces[id] = avg_score requests.post( - f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label", - json={"subLabel": sub_label, "subLabelScore": score}, + f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label", + json={"subLabel": sub_label, "subLabelScore": avg_score}, ) def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: From 31f65ceab5a890ff8bc27e6e10b3dc8c1c1152bf Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 23 Oct 2024 09:26:03 -0600 Subject: [PATCH 04/18] Fix access (#14540) --- frigate/embeddings/embeddings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 3fb6d5d26..4bb1afcd6 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -116,10 +116,10 @@ class Embeddings: model_name="jinaai/jina-clip-v1", model_file=model_file, download_urls=download_urls, - model_size=config.model_size, + model_size=config.semantic_search.model_size, model_type=ModelTypeEnum.vision, requestor=self.requestor, - device="GPU" if config.model_size == "large" else "CPU", + device="GPU" if config.semantic_search.model_size == "large" else "CPU", ) self.face_embedding = None From 8d98fdfee10d1fa47f1cd7a28cfa6bdefaaad11c Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 23 Oct 2024 13:50:58 -0600 Subject: [PATCH 05/18] Face detection (#14544) * Add support for face detection * Add support for detecting faces during registration * Set body size to be larger * Undo --- .../rootfs/usr/local/nginx/conf/nginx.conf | 2 + frigate/api/defs/request/events_body.py | 3 + frigate/api/event.py | 55 +++++-- frigate/embeddings/embeddings.py | 3 +- frigate/embeddings/maintainer.py | 148 ++++++++++++++---- 5 files changed, 159 insertions(+), 52 deletions(-) diff --git a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf index 75527bf53..fa487a083 100644 --- a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf +++ b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf @@ -246,6 +246,8 @@ http { proxy_no_cache $should_not_cache; add_header X-Cache-Status $upstream_cache_status; + client_max_body_size 10M; + location /api/vod/ { include auth_request.conf; proxy_pass http://frigate_api/vod/; diff --git a/frigate/api/defs/request/events_body.py b/frigate/api/defs/request/events_body.py index 1c8576f02..0fefbe43f 100644 --- a/frigate/api/defs/request/events_body.py +++ b/frigate/api/defs/request/events_body.py @@ -8,6 +8,9 @@ class EventsSubLabelBody(BaseModel): subLabelScore: Optional[float] = Field( title="Score for sub label", default=None, gt=0.0, le=1.0 ) + camera: Optional[str] = Field( + title="Camera this object is detected on.", default=None + ) class EventsDescriptionBody(BaseModel): diff --git a/frigate/api/event.py b/frigate/api/event.py index 3ba4ae426..fc51a06c2 100644 --- a/frigate/api/event.py +++ b/frigate/api/event.py @@ -909,38 +909,59 @@ def set_sub_label( try: event: Event = Event.get(Event.id == event_id) except DoesNotExist: + if not body.camera: + return JSONResponse( + content=( + { + "success": False, + "message": "Event " + + event_id + + " not found and camera is not provided.", + } + ), + status_code=404, + ) + + event = None + + if request.app.detected_frames_processor: + tracked_obj: TrackedObject = ( + request.app.detected_frames_processor.camera_states[ + event.camera if event else body.camera + ].tracked_objects.get(event_id) + ) + else: + tracked_obj = None + + if not event and not tracked_obj: return JSONResponse( - content=({"success": False, "message": "Event " + event_id + " not found"}), + content=( + {"success": False, "message": "Event " + event_id + " not found."} + ), status_code=404, ) new_sub_label = body.subLabel new_score = body.subLabelScore - if not event.end_time: - # update tracked object - tracked_obj: TrackedObject = ( - request.app.detected_frames_processor.camera_states[ - event.camera - ].tracked_objects.get(event.id) - ) - - if tracked_obj: - tracked_obj.obj_data["sub_label"] = (new_sub_label, new_score) + if tracked_obj: + tracked_obj.obj_data["sub_label"] = (new_sub_label, new_score) # update timeline items Timeline.update( data=Timeline.data.update({"sub_label": (new_sub_label, new_score)}) ).where(Timeline.source_id == event_id).execute() - event.sub_label = new_sub_label + if event: + event.sub_label = new_sub_label - if new_score: - data = event.data - data["sub_label_score"] = new_score - event.data = data + if new_score: + data = event.data + data["sub_label_score"] = new_score + event.data = data + + event.save() - event.save() return JSONResponse( content=( { diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 4bb1afcd6..cc54ba548 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -129,7 +129,8 @@ class Embeddings: model_name="facenet", model_file="facenet.onnx", download_urls={ - "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx" + "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx", + "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx", }, model_size="large", model_type=ModelTypeEnum.face, diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index f7947932a..76b174862 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -73,6 +73,19 @@ class EmbeddingMaintainer(threading.Thread): self.tracked_events: dict[str, list[any]] = {} self.genai_client = get_genai_client(config) + @property + def face_detector(self) -> cv2.FaceDetectorYN: + # Lazily create the classifier. + if "face_detector" not in self.__dict__: + self.__dict__["face_detector"] = cv2.FaceDetectorYN.create( + "/config/model_cache/facenet/facedet.onnx", + config="", + input_size=(320, 320), + score_threshold=0.8, + nms_threshold=0.3, + ) + return self.__dict__["face_detector"] + def run(self) -> None: """Maintain a SQLite-vec database for semantic search.""" while not self.stop_event.is_set(): @@ -91,7 +104,7 @@ class EmbeddingMaintainer(threading.Thread): def _process_requests(self) -> None: """Process embeddings requests""" - def _handle_request(topic: str, data: str) -> str: + def _handle_request(topic: str, data: dict[str, any]) -> str: try: if topic == EmbeddingsRequestEnum.embed_description.value: return serialize( @@ -111,12 +124,34 @@ class EmbeddingMaintainer(threading.Thread): self.embeddings.text_embedding([data])[0], pack=False ) elif topic == EmbeddingsRequestEnum.register_face.value: - self.embeddings.embed_face( - data["face_name"], - base64.b64decode(data["image"]), - upsert=True, - ) - return None + if data.get("cropped"): + self.embeddings.embed_face( + data["face_name"], + base64.b64decode(data["image"]), + upsert=True, + ) + return True + else: + img = cv2.imdecode( + np.frombuffer( + base64.b64decode(data["image"]), dtype=np.uint8 + ), + cv2.IMREAD_COLOR, + ) + face_box = self._detect_face(img) + + if not face_box: + return False + + face = img[face_box[1] : face_box[3], face_box[0] : face_box[2]] + ret, webp = cv2.imencode( + ".webp", face, [int(cv2.IMWRITE_WEBP_QUALITY), 100] + ) + self.embeddings.embed_face( + data["face_name"], webp.tobytes(), upsert=True + ) + + return False except Exception as e: logger.error(f"Unable to handle embeddings request {e}") @@ -277,7 +312,7 @@ class EmbeddingMaintainer(threading.Thread): if event_id: self.handle_regenerate_description(event_id, source) - def _search_face(self, query_embedding: bytes) -> list: + def _search_face(self, query_embedding: bytes) -> list[tuple[str, float]]: """Search for the face most closely matching the embedding.""" sql_query = f""" SELECT @@ -289,6 +324,29 @@ class EmbeddingMaintainer(threading.Thread): """ return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall() + def _detect_face(self, input: np.ndarray) -> tuple[int, int, int, int]: + """Detect faces in input image.""" + self.face_detector.setInputSize((input.shape[1], input.shape[0])) + faces = self.face_detector.detect(input) + + if faces[1] is None: + return None + + face = None + + for _, potential_face in enumerate(faces[1]): + raw_bbox = potential_face[0:4].astype(np.uint16) + x: int = max(raw_bbox[0], 0) + y: int = max(raw_bbox[1], 0) + w: int = raw_bbox[2] + h: int = raw_bbox[3] + bbox = (x, y, x + w, y + h) + + if face is None or area(bbox) > area(face): + face = bbox + + return face + def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None: """Look for faces in image.""" id = obj_data["id"] @@ -309,8 +367,23 @@ class EmbeddingMaintainer(threading.Thread): face: Optional[dict[str, any]] = None if self.requires_face_detection: - # TODO run cv2 face detection - pass + logger.debug("Running manual face detection.") + person_box = obj_data.get("box") + + if not person_box: + return None + + rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) + left, top, right, bottom = person_box + person = rgb[top:bottom, left:right] + face = self._detect_face(person) + + if not face: + logger.debug("Detected no faces for person object.") + return + + face_frame = person[face[1] : face[3], face[0] : face[2]] + face_frame = cv2.cvtColor(face_frame, cv2.COLOR_RGB2BGR) else: # don't run for object without attributes if not obj_data.get("current_attributes"): @@ -325,23 +398,23 @@ class EmbeddingMaintainer(threading.Thread): if face is None or attr.get("score", 0.0) > face.get("score", 0.0): face = attr - # no faces detected in this frame - if not face: - return + # no faces detected in this frame + if not face: + return - face_box = face.get("box") + face_box = face.get("box") - # check that face is valid - if ( - not face_box - or area(face_box) < self.config.semantic_search.face_recognition.min_area - ): - logger.debug(f"Invalid face box {face}") - return + # check that face is valid + if not face_box or area(face_box) < self.config.face_recognition.min_area: + logger.debug(f"Invalid face box {face}") + return - face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) - face_frame = face_frame[face_box[1] : face_box[3], face_box[0] : face_box[2]] - ret, jpg = cv2.imencode( + face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) + face_frame = face_frame[ + face_box[1] : face_box[3], face_box[0] : face_box[2] + ] + + ret, webp = cv2.imencode( ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100] ) @@ -349,12 +422,13 @@ class EmbeddingMaintainer(threading.Thread): logger.debug("Not processing face due to error creating cropped image.") return - embedding = self.embeddings.embed_face("unknown", jpg.tobytes(), upsert=False) + embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False) query_embedding = serialize(embedding) best_faces = self._search_face(query_embedding) logger.debug(f"Detected best faces for person as: {best_faces}") if not best_faces or len(best_faces) < REQUIRED_FACES: + logger.debug(f"{len(best_faces)} < {REQUIRED_FACES} min required faces.") return sub_label = str(best_faces[0][0]).split("-")[0] @@ -363,28 +437,34 @@ class EmbeddingMaintainer(threading.Thread): for face in best_faces: score = 1.0 - face[1] - if face[0] != sub_label: + if face[0].split("-")[0] != sub_label: logger.debug("Detected multiple faces, result is not valid.") - return None + return avg_score += score - avg_score = avg_score / REQUIRED_FACES + avg_score = round(avg_score / REQUIRED_FACES, 2) - if avg_score < self.config.semantic_search.face_recognition.threshold or ( + if avg_score < self.config.face_recognition.threshold or ( id in self.detected_faces and avg_score <= self.detected_faces[id] ): logger.debug( - "Detected face does not score higher than threshold / previous face." + f"Recognized face score {avg_score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})." ) - return None + return - self.detected_faces[id] = avg_score - requests.post( + resp = requests.post( f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label", - json={"subLabel": sub_label, "subLabelScore": avg_score}, + json={ + "camera": obj_data.get("camera"), + "subLabel": sub_label, + "subLabelScore": avg_score, + }, ) + if resp.status_code == 200: + self.detected_faces[id] = avg_score + def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: """Return jpg thumbnail of a region of the frame.""" frame = cv2.cvtColor(yuv_frame, cv2.COLOR_YUV2BGR_I420) From b84b2313527199f8739c03aef8fe926a6ca649ff Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sat, 26 Oct 2024 12:07:45 -0500 Subject: [PATCH 06/18] License plate recognition (ALPR) backend (#14564) * Update version * Face recognition backend (#14495) * Add basic config and face recognition table * Reconfigure updates processing to handle face * Crop frame to face box * Implement face embedding calculation * Get matching face embeddings * Add support face recognition based on existing faces * Use arcface face embeddings instead of generic embeddings model * Add apis for managing faces * Implement face uploading API * Build out more APIs * Add min area config * Handle larger images * Add more debug logs * fix calculation * Reduce timeout * Small tweaks * Use webp images * Use facenet model * Improve face recognition (#14537) * Increase requirements for face to be set * Manage faces properly * Add basic docs * Simplify * Separate out face recognition frome semantic search * Update docs * Formatting * Fix access (#14540) * Face detection (#14544) * Add support for face detection * Add support for detecting faces during registration * Set body size to be larger * Undo * Update version * Face recognition backend (#14495) * Add basic config and face recognition table * Reconfigure updates processing to handle face * Crop frame to face box * Implement face embedding calculation * Get matching face embeddings * Add support face recognition based on existing faces * Use arcface face embeddings instead of generic embeddings model * Add apis for managing faces * Implement face uploading API * Build out more APIs * Add min area config * Handle larger images * Add more debug logs * fix calculation * Reduce timeout * Small tweaks * Use webp images * Use facenet model * Improve face recognition (#14537) * Increase requirements for face to be set * Manage faces properly * Add basic docs * Simplify * Separate out face recognition frome semantic search * Update docs * Formatting * Fix access (#14540) * Face detection (#14544) * Add support for face detection * Add support for detecting faces during registration * Set body size to be larger * Undo * initial foundation for alpr with paddleocr * initial foundation for alpr with paddleocr * initial foundation for alpr with paddleocr * config * config * lpr maintainer * clean up * clean up * fix processing * don't process for stationary cars * fix order * fixes * check for known plates * improved length and character by character confidence * model fixes and small tweaks * docs * placeholder for non frigate+ model lp detection --------- Co-authored-by: Nicolas Mowen --- .cspell/frigate-dictionary.txt | 7 +- docker/main/requirements-wheels.txt | 3 + .../license_plate_recognition.md | 48 ++ docs/sidebars.ts | 1 + frigate/config/config.py | 10 +- frigate/config/semantic_search.py | 23 +- frigate/embeddings/alpr/alpr.py | 791 ++++++++++++++++++ frigate/embeddings/embeddings.py | 45 + frigate/embeddings/functions/onnx.py | 30 +- frigate/embeddings/maintainer.py | 202 ++++- 10 files changed, 1151 insertions(+), 9 deletions(-) create mode 100644 docs/docs/configuration/license_plate_recognition.md create mode 100644 frigate/embeddings/alpr/alpr.py diff --git a/.cspell/frigate-dictionary.txt b/.cspell/frigate-dictionary.txt index 64fd7ca72..cc6adcc02 100644 --- a/.cspell/frigate-dictionary.txt +++ b/.cspell/frigate-dictionary.txt @@ -2,6 +2,7 @@ aarch absdiff airockchip Alloc +alpr Amcrest amdgpu analyzeduration @@ -61,6 +62,7 @@ dsize dtype ECONNRESET edgetpu +facenet fastapi faststart fflags @@ -114,6 +116,8 @@ itemsize Jellyfin jetson jetsons +jina +jinaai joserfc jsmpeg jsonify @@ -187,6 +191,7 @@ openai opencv openvino OWASP +paddleocr paho passwordless popleft @@ -308,4 +313,4 @@ yolo yolonas yolox zeep -zerolatency +zerolatency \ No newline at end of file diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 804f6135e..30928d679 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -47,3 +47,6 @@ openai == 1.51.* # push notifications py-vapid == 1.9.* pywebpush == 2.0.* +# alpr +pyclipper == 1.3.* +shapely == 2.0.* diff --git a/docs/docs/configuration/license_plate_recognition.md b/docs/docs/configuration/license_plate_recognition.md new file mode 100644 index 000000000..b2fbb6214 --- /dev/null +++ b/docs/docs/configuration/license_plate_recognition.md @@ -0,0 +1,48 @@ +--- +id: license_plate_recognition +title: License Plate Recognition (LPR) +--- + +Frigate can recognize license plates on vehicles and automatically add the detected characters as a `sub_label` to objects that are of type `car`. A common use case may be to read the license plates of cars pulling into a driveway or cars passing by on a street with a dedicated LPR camera. + +Users running a Frigate+ model should ensure that `license_plate` is added to the [list of objects to track](https://docs.frigate.video/plus/#available-label-types) either globally or for a specific camera. This will improve the accuracy and performance of the LPR model. + +LPR is most effective when the vehicle’s license plate is fully visible to the camera. For moving vehicles, Frigate will attempt to read the plate continuously, refining its detection and keeping the most confident result. LPR will not run on stationary vehicles. + +## Minimum System Requirements + +License plate recognition works by running AI models locally on your system. The models are relatively lightweight and run on your CPU. At least 4GB of RAM is required. + +## Configuration + +License plate recognition is disabled by default. Enable it in your config file: + +```yaml +lpr: + enabled: true +``` + +## Advanced Configuration + +Several options are available to fine-tune the LPR feature. For example, you can adjust the `min_area` setting, which defines the minimum size in pixels a license plate must be before LPR runs. The default is 500 pixels. + +Additionally, you can define `known_plates`, allowing Frigate to label tracked vehicles with custom sub_labels when a recognized plate is detected. This information is then accessible in the UI, filters, and notifications. + +```yaml +lpr: + enabled: true + min_area: 500 + known_plates: + Wife's Car: + - "ABC-1234" + - "ABC-I234" + Johnny: + - "JHN-1234" + - "JMN-1234" + - "JHN-I234" + Sally: + - "SLL-1234" + - "5LL-1234" +``` + +In this example, "Wife's Car" will appear as the label for any vehicle matching the plate "ABC-1234." The model might occasionally interpret the digit 1 as a capital I (e.g., "ABC-I234"), so both variations are listed. Similarly, multiple possible variations are specified for Johnny and Sally. diff --git a/docs/sidebars.ts b/docs/sidebars.ts index e214dde88..b0b8cdf48 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -37,6 +37,7 @@ const sidebars: SidebarsConfig = { 'configuration/semantic_search', 'configuration/genai', 'configuration/face_recognition', + 'configuration/license_plate_recognition', ], Cameras: [ 'configuration/cameras', diff --git a/frigate/config/config.py b/frigate/config/config.py index 88299767e..1b5ced152 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -57,7 +57,11 @@ from .logger import LoggerConfig from .mqtt import MqttConfig from .notification import NotificationConfig from .proxy import ProxyConfig -from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig +from .semantic_search import ( + FaceRecognitionConfig, + LicensePlateRecognitionConfig, + SemanticSearchConfig, +) from .telemetry import TelemetryConfig from .tls import TlsConfig from .ui import UIConfig @@ -333,6 +337,10 @@ class FrigateConfig(FrigateBaseModel): face_recognition: FaceRecognitionConfig = Field( default_factory=FaceRecognitionConfig, title="Face recognition config." ) + lpr: LicensePlateRecognitionConfig = Field( + default_factory=LicensePlateRecognitionConfig, + title="License Plate recognition config.", + ) ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.") # Detector config diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py index 7ffdbd137..f5e881e4e 100644 --- a/frigate/config/semantic_search.py +++ b/frigate/config/semantic_search.py @@ -1,10 +1,14 @@ -from typing import Optional +from typing import Dict, List, Optional from pydantic import Field from .base import FrigateBaseModel -__all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"] +__all__ = [ + "FaceRecognitionConfig", + "SemanticSearchConfig", + "LicensePlateRecognitionConfig", +] class SemanticSearchConfig(FrigateBaseModel): @@ -25,3 +29,18 @@ class FaceRecognitionConfig(FrigateBaseModel): min_area: int = Field( default=500, title="Min area of face box to consider running face recognition." ) + + +class LicensePlateRecognitionConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable license plate recognition.") + threshold: float = Field( + default=0.9, + title="License plate confidence score required to be added to the object as a sub label.", + ) + min_area: int = Field( + default=500, + title="Min area of license plate to consider running license plate recognition.", + ) + known_plates: Optional[Dict[str, List[str]]] = Field( + default={}, title="Known plates to track." + ) diff --git a/frigate/embeddings/alpr/alpr.py b/frigate/embeddings/alpr/alpr.py new file mode 100644 index 000000000..b91a50e3a --- /dev/null +++ b/frigate/embeddings/alpr/alpr.py @@ -0,0 +1,791 @@ +import logging +import math +from typing import List, Tuple + +import cv2 +import numpy as np +from pyclipper import ET_CLOSEDPOLYGON, JT_ROUND, PyclipperOffset +from shapely.geometry import Polygon + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config.semantic_search import LicensePlateRecognitionConfig +from frigate.embeddings.embeddings import Embeddings + +logger = logging.getLogger(__name__) + + +class LicensePlateRecognition: + def __init__( + self, + config: LicensePlateRecognitionConfig, + requestor: InterProcessRequestor, + embeddings: Embeddings, + ): + self.lpr_config = config + self.requestor = requestor + self.embeddings = embeddings + self.detection_model = self.embeddings.lpr_detection_model + self.classification_model = self.embeddings.lpr_classification_model + self.recognition_model = self.embeddings.lpr_recognition_model + self.ctc_decoder = CTCDecoder() + + self.batch_size = 6 + + # Detection specific parameters + self.min_size = 3 + self.max_size = 960 + self.box_thresh = 0.8 + self.mask_thresh = 0.8 + + if self.lpr_config.enabled: + # all models need to be loaded to run LPR + self.detection_model._load_model_and_utils() + self.classification_model._load_model_and_utils() + self.recognition_model._load_model_and_utils() + + def detect(self, image: np.ndarray) -> List[np.ndarray]: + """ + Detect possible license plates in the input image by first resizing and normalizing it, + running a detection model, and filtering out low-probability regions. + + Args: + image (np.ndarray): The input image in which license plates will be detected. + + Returns: + List[np.ndarray]: A list of bounding box coordinates representing detected license plates. + """ + h, w = image.shape[:2] + + if sum([h, w]) < 64: + image = self.zero_pad(image) + + resized_image = self.resize_image(image) + normalized_image = self.normalize_image(resized_image) + + outputs = self.detection_model([normalized_image])[0] + outputs = outputs[0, :, :] + + boxes, _ = self.boxes_from_bitmap(outputs, outputs > self.mask_thresh, w, h) + return self.filter_polygon(boxes, (h, w)) + + def classify( + self, images: List[np.ndarray] + ) -> Tuple[List[np.ndarray], List[Tuple[str, float]]]: + """ + Classify the orientation or category of each detected license plate. + + Args: + images (List[np.ndarray]): A list of images of detected license plates. + + Returns: + Tuple[List[np.ndarray], List[Tuple[str, float]]]: A tuple of rotated/normalized plate images + and classification results with confidence scores. + """ + num_images = len(images) + indices = np.argsort([x.shape[1] / x.shape[0] for x in images]) + + for i in range(0, num_images, self.batch_size): + norm_images = [] + for j in range(i, min(num_images, i + self.batch_size)): + norm_img = self._preprocess_classification_image(images[indices[j]]) + norm_img = norm_img[np.newaxis, :] + norm_images.append(norm_img) + + outputs = self.classification_model(norm_images) + + return self._process_classification_output(images, outputs) + + def recognize( + self, images: List[np.ndarray] + ) -> Tuple[List[str], List[List[float]]]: + """ + Recognize the characters on the detected license plates using the recognition model. + + Args: + images (List[np.ndarray]): A list of images of license plates to recognize. + + Returns: + Tuple[List[str], List[List[float]]]: A tuple of recognized license plate texts and confidence scores. + """ + input_shape = [3, 48, 320] + num_images = len(images) + + # sort images by aspect ratio for processing + indices = np.argsort(np.array([x.shape[1] / x.shape[0] for x in images])) + + for index in range(0, num_images, self.batch_size): + input_h, input_w = input_shape[1], input_shape[2] + max_wh_ratio = input_w / input_h + norm_images = [] + + # calculate the maximum aspect ratio in the current batch + for i in range(index, min(num_images, index + self.batch_size)): + h, w = images[indices[i]].shape[0:2] + max_wh_ratio = max(max_wh_ratio, w * 1.0 / h) + + # preprocess the images based on the max aspect ratio + for i in range(index, min(num_images, index + self.batch_size)): + norm_image = self._preprocess_recognition_image( + images[indices[i]], max_wh_ratio + ) + norm_image = norm_image[np.newaxis, :] + norm_images.append(norm_image) + + outputs = self.recognition_model(norm_images) + return self.ctc_decoder(outputs) + + def process_license_plate( + self, image: np.ndarray + ) -> Tuple[List[str], List[float], List[int]]: + """ + Complete pipeline for detecting, classifying, and recognizing license plates in the input image. + + Args: + image (np.ndarray): The input image in which to detect, classify, and recognize license plates. + + Returns: + Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates. + """ + if ( + self.detection_model.runner is None + or self.classification_model.runner is None + or self.recognition_model.runner is None + ): + # we might still be downloading the models + logger.debug("Model runners not loaded") + return [], [], [] + + plate_points = self.detect(image) + if len(plate_points) == 0: + return [], [], [] + + plate_points = self.sort_polygon(list(plate_points)) + plate_images = [self._crop_license_plate(image, x) for x in plate_points] + rotated_images, _ = self.classify(plate_images) + + # keep track of the index of each image for correct area calc later + sorted_indices = np.argsort([x.shape[1] / x.shape[0] for x in rotated_images]) + reverse_mapping = { + idx: original_idx for original_idx, idx in enumerate(sorted_indices) + } + + results, confidences = self.recognize(rotated_images) + + if results: + license_plates = [""] * len(rotated_images) + average_confidences = [[0.0]] * len(rotated_images) + areas = [0] * len(rotated_images) + + # map results back to original image order + for i, (plate, conf) in enumerate(zip(results, confidences)): + original_idx = reverse_mapping[i] + + height, width = rotated_images[original_idx].shape[:2] + area = height * width + + average_confidence = conf + + # set to True to write each cropped image for debugging + if False: + save_image = cv2.cvtColor( + rotated_images[original_idx], cv2.COLOR_RGB2BGR + ) + filename = f"/config/plate_{original_idx}_{plate}_{area}.jpg" + cv2.imwrite(filename, save_image) + + license_plates[original_idx] = plate + average_confidences[original_idx] = average_confidence + areas[original_idx] = area + + return license_plates, average_confidences, areas + + return [], [], [] + + def resize_image(self, image: np.ndarray) -> np.ndarray: + """ + Resize the input image while maintaining the aspect ratio, ensuring dimensions are multiples of 32. + + Args: + image (np.ndarray): The input image to resize. + + Returns: + np.ndarray: The resized image. + """ + h, w = image.shape[:2] + ratio = min(self.max_size / max(h, w), 1.0) + resize_h = max(int(round(int(h * ratio) / 32) * 32), 32) + resize_w = max(int(round(int(w * ratio) / 32) * 32), 32) + return cv2.resize(image, (resize_w, resize_h)) + + def normalize_image(self, image: np.ndarray) -> np.ndarray: + """ + Normalize the input image by subtracting the mean and multiplying by the standard deviation. + + Args: + image (np.ndarray): The input image to normalize. + + Returns: + np.ndarray: The normalized image, transposed to match the model's expected input format. + """ + mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64") + std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype("float64") + + image = image.astype("float32") + cv2.subtract(image, mean, image) + cv2.multiply(image, std, image) + return image.transpose((2, 0, 1))[np.newaxis, ...] + + def boxes_from_bitmap( + self, output: np.ndarray, mask: np.ndarray, dest_width: int, dest_height: int + ) -> Tuple[np.ndarray, List[float]]: + """ + Process the binary mask to extract bounding boxes and associated confidence scores. + + Args: + output (np.ndarray): Output confidence map from the model. + mask (np.ndarray): Binary mask of detected regions. + dest_width (int): Target width for scaling the box coordinates. + dest_height (int): Target height for scaling the box coordinates. + + Returns: + Tuple[np.ndarray, List[float]]: Array of bounding boxes and list of corresponding scores. + """ + + mask = (mask * 255).astype(np.uint8) + height, width = mask.shape + outs = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + # handle different return values of findContours between OpenCV versions + contours = outs[0] if len(outs) == 2 else outs[1] + + boxes = [] + scores = [] + + for index in range(len(contours)): + contour = contours[index] + + # get minimum bounding box (rotated rectangle) around the contour and the smallest side length. + points, min_side = self.get_min_boxes(contour) + + if min_side < self.min_size: + continue + + points = np.array(points) + + score = self.box_score(output, contour) + if self.box_thresh > score: + continue + + polygon = Polygon(points) + distance = polygon.area / polygon.length + + # Use pyclipper to shrink the polygon slightly based on the computed distance. + offset = PyclipperOffset() + offset.AddPath(points, JT_ROUND, ET_CLOSEDPOLYGON) + points = np.array(offset.Execute(distance * 1.5)).reshape((-1, 1, 2)) + + # get the minimum bounding box around the shrunken polygon. + box, min_side = self.get_min_boxes(points) + + if min_side < self.min_size + 2: + continue + + box = np.array(box) + + # normalize and clip box coordinates to fit within the destination image size. + box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height + ) + + boxes.append(box.astype("int32")) + scores.append(score) + + return np.array(boxes, dtype="int32"), scores + + @staticmethod + def get_min_boxes(contour: np.ndarray) -> Tuple[List[Tuple[float, float]], float]: + """ + Calculate the minimum bounding box (rotated rectangle) for a given contour. + + Args: + contour (np.ndarray): The contour points of the detected shape. + + Returns: + Tuple[List[Tuple[float, float]], float]: A list of four points representing the + corners of the bounding box, and the length of the shortest side. + """ + bounding_box = cv2.minAreaRect(contour) + points = sorted(cv2.boxPoints(bounding_box), key=lambda x: x[0]) + index_1, index_4 = (0, 1) if points[1][1] > points[0][1] else (1, 0) + index_2, index_3 = (2, 3) if points[3][1] > points[2][1] else (3, 2) + box = [points[index_1], points[index_2], points[index_3], points[index_4]] + return box, min(bounding_box[1]) + + @staticmethod + def box_score(bitmap: np.ndarray, contour: np.ndarray) -> float: + """ + Calculate the average score within the bounding box of a contour. + + Args: + bitmap (np.ndarray): The output confidence map from the model. + contour (np.ndarray): The contour of the detected shape. + + Returns: + float: The average score of the pixels inside the contour region. + """ + h, w = bitmap.shape[:2] + contour = contour.reshape(-1, 2) + x1, y1 = np.clip(contour.min(axis=0), 0, [w - 1, h - 1]) + x2, y2 = np.clip(contour.max(axis=0), 0, [w - 1, h - 1]) + mask = np.zeros((y2 - y1 + 1, x2 - x1 + 1), dtype=np.uint8) + cv2.fillPoly(mask, [contour - [x1, y1]], 1) + return cv2.mean(bitmap[y1 : y2 + 1, x1 : x2 + 1], mask)[0] + + @staticmethod + def expand_box(points: List[Tuple[float, float]]) -> np.ndarray: + """ + Expand a polygonal shape slightly by a factor determined by the area-to-perimeter ratio. + + Args: + points (List[Tuple[float, float]]): Points of the polygon to expand. + + Returns: + np.ndarray: Expanded polygon points. + """ + polygon = Polygon(points) + distance = polygon.area / polygon.length + offset = PyclipperOffset() + offset.AddPath(points, JT_ROUND, ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance * 1.5)).reshape((-1, 2)) + return expanded + + def filter_polygon( + self, points: List[np.ndarray], shape: Tuple[int, int] + ) -> np.ndarray: + """ + Filter a set of polygons to include only valid ones that fit within an image shape + and meet size constraints. + + Args: + points (List[np.ndarray]): List of polygons to filter. + shape (Tuple[int, int]): Shape of the image (height, width). + + Returns: + np.ndarray: List of filtered polygons. + """ + height, width = shape + return np.array( + [ + self.clockwise_order(point) + for point in points + if self.is_valid_polygon(point, width, height) + ] + ) + + @staticmethod + def is_valid_polygon(point: np.ndarray, width: int, height: int) -> bool: + """ + Check if a polygon is valid, meaning it fits within the image bounds + and has sides of a minimum length. + + Args: + point (np.ndarray): The polygon to validate. + width (int): Image width. + height (int): Image height. + + Returns: + bool: Whether the polygon is valid or not. + """ + return ( + point[:, 0].min() >= 0 + and point[:, 0].max() < width + and point[:, 1].min() >= 0 + and point[:, 1].max() < height + and np.linalg.norm(point[0] - point[1]) > 3 + and np.linalg.norm(point[0] - point[3]) > 3 + ) + + @staticmethod + def clockwise_order(point: np.ndarray) -> np.ndarray: + """ + Arrange the points of a polygon in clockwise order based on their angular positions + around the polygon's center. + + Args: + point (np.ndarray): Array of points of the polygon. + + Returns: + np.ndarray: Points ordered in clockwise direction. + """ + center = point.mean(axis=0) + return point[ + np.argsort(np.arctan2(point[:, 1] - center[1], point[:, 0] - center[0])) + ] + + @staticmethod + def sort_polygon(points): + """ + Sort polygons based on their position in the image. If polygons are close in vertical + position (within 10 pixels), sort them by horizontal position. + + Args: + points: List of polygons to sort. + + Returns: + List: Sorted list of polygons. + """ + points.sort(key=lambda x: (x[0][1], x[0][0])) + for i in range(len(points) - 1): + for j in range(i, -1, -1): + if abs(points[j + 1][0][1] - points[j][0][1]) < 10 and ( + points[j + 1][0][0] < points[j][0][0] + ): + temp = points[j] + points[j] = points[j + 1] + points[j + 1] = temp + else: + break + return points + + @staticmethod + def zero_pad(image: np.ndarray) -> np.ndarray: + """ + Apply zero-padding to an image, ensuring its dimensions are at least 32x32. + The padding is added only if needed. + + Args: + image (np.ndarray): Input image. + + Returns: + np.ndarray: Zero-padded image. + """ + h, w, c = image.shape + pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + pad[:h, :w, :] = image + return pad + + @staticmethod + def _preprocess_classification_image(image: np.ndarray) -> np.ndarray: + """ + Preprocess a single image for classification by resizing, normalizing, and padding. + + This method resizes the input image to a fixed height of 48 pixels while adjusting + the width dynamically up to a maximum of 192 pixels. The image is then normalized and + padded to fit the required input dimensions for classification. + + Args: + image (np.ndarray): Input image to preprocess. + + Returns: + np.ndarray: Preprocessed and padded image. + """ + # fixed height of 48, dynamic width up to 192 + input_shape = (3, 48, 192) + input_c, input_h, input_w = input_shape + + h, w = image.shape[:2] + ratio = w / h + resized_w = min(input_w, math.ceil(input_h * ratio)) + + resized_image = cv2.resize(image, (resized_w, input_h)) + + # handle single-channel images (grayscale) if needed + if input_c == 1 and resized_image.ndim == 2: + resized_image = resized_image[np.newaxis, :, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) + + # normalize + resized_image = (resized_image.astype("float32") / 255.0 - 0.5) / 0.5 + + padded_image = np.zeros((input_c, input_h, input_w), dtype=np.float32) + padded_image[:, :, :resized_w] = resized_image + + return padded_image + + def _process_classification_output( + self, images: List[np.ndarray], outputs: List[np.ndarray] + ) -> Tuple[List[np.ndarray], List[Tuple[str, float]]]: + """ + Process the classification model output by matching labels with confidence scores. + + This method processes the outputs from the classification model and rotates images + with high confidence of being labeled "180". It ensures that results are mapped to + the original image order. + + Args: + images (List[np.ndarray]): List of input images. + outputs (List[np.ndarray]): Corresponding model outputs. + + Returns: + Tuple[List[np.ndarray], List[Tuple[str, float]]]: A tuple of processed images and + classification results (label and confidence score). + """ + labels = ["0", "180"] + results = [["", 0.0]] * len(images) + indices = np.argsort(np.array([x.shape[1] / x.shape[0] for x in images])) + + outputs = np.stack(outputs) + + outputs = [ + (labels[idx], outputs[i, idx]) + for i, idx in enumerate(outputs.argmax(axis=1)) + ] + + for i in range(0, len(images), self.batch_size): + for j in range(len(outputs)): + label, score = outputs[j] + results[indices[i + j]] = [label, score] + if "180" in label and score >= self.lpr_config.threshold: + images[indices[i + j]] = cv2.rotate(images[indices[i + j]], 1) + + return images, results + + def _preprocess_recognition_image( + self, image: np.ndarray, max_wh_ratio: float + ) -> np.ndarray: + """ + Preprocess an image for recognition by dynamically adjusting its width. + + This method adjusts the width of the image based on the maximum width-to-height ratio + while keeping the height fixed at 48 pixels. The image is then normalized and padded + to fit the required input dimensions for recognition. + + Args: + image (np.ndarray): Input image to preprocess. + max_wh_ratio (float): Maximum width-to-height ratio for resizing. + + Returns: + np.ndarray: Preprocessed and padded image. + """ + # fixed height of 48, dynamic width based on ratio + input_shape = [3, 48, 320] + input_h, input_w = input_shape[1], input_shape[2] + + assert image.shape[2] == input_shape[0], "Unexpected number of image channels." + + # dynamically adjust input width based on max_wh_ratio + input_w = int(input_h * max_wh_ratio) + + # check for model-specific input width + model_input_w = self.recognition_model.runner.ort.get_inputs()[0].shape[3] + if isinstance(model_input_w, int) and model_input_w > 0: + input_w = model_input_w + + h, w = image.shape[:2] + aspect_ratio = w / h + resized_w = min(input_w, math.ceil(input_h * aspect_ratio)) + + resized_image = cv2.resize(image, (resized_w, input_h)) + resized_image = resized_image.transpose((2, 0, 1)) + resized_image = (resized_image.astype("float32") / 255.0 - 0.5) / 0.5 + + padded_image = np.zeros((input_shape[0], input_h, input_w), dtype=np.float32) + padded_image[:, :, :resized_w] = resized_image + + return padded_image + + @staticmethod + def _crop_license_plate(image: np.ndarray, points: np.ndarray) -> np.ndarray: + """ + Crop the license plate from the image using four corner points. + + This method crops the region containing the license plate by using the perspective + transformation based on four corner points. If the resulting image is significantly + taller than wide, the image is rotated to the correct orientation. + + Args: + image (np.ndarray): Input image containing the license plate. + points (np.ndarray): Four corner points defining the plate's position. + + Returns: + np.ndarray: Cropped and potentially rotated license plate image. + """ + assert len(points) == 4, "shape of points must be 4*2" + points = points.astype(np.float32) + crop_width = int( + max( + np.linalg.norm(points[0] - points[1]), + np.linalg.norm(points[2] - points[3]), + ) + ) + crop_height = int( + max( + np.linalg.norm(points[0] - points[3]), + np.linalg.norm(points[1] - points[2]), + ) + ) + pts_std = np.float32( + [[0, 0], [crop_width, 0], [crop_width, crop_height], [0, crop_height]] + ) + matrix = cv2.getPerspectiveTransform(points, pts_std) + image = cv2.warpPerspective( + image, + matrix, + (crop_width, crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC, + ) + height, width = image.shape[0:2] + if height * 1.0 / width >= 1.5: + image = np.rot90(image, k=3) + return image + + +class CTCDecoder: + """ + A decoder for interpreting the output of a CTC (Connectionist Temporal Classification) model. + + This decoder converts the model's output probabilities into readable sequences of characters + while removing duplicates and handling blank tokens. It also calculates the confidence scores + for each decoded character sequence. + """ + + def __init__(self): + """ + Initialize the CTCDecoder with a list of characters and a character map. + + The character set includes digits, letters, special characters, and a "blank" token + (used by the CTC model for decoding purposes). A character map is created to map + indices to characters. + """ + self.characters = [ + "blank", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "[", + "\\", + "]", + "^", + "_", + "`", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "{", + "|", + "}", + "~", + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + " ", + " ", + ] + self.char_map = {i: char for i, char in enumerate(self.characters)} + + def __call__( + self, outputs: List[np.ndarray] + ) -> Tuple[List[str], List[List[float]]]: + """ + Decode a batch of model outputs into character sequences and their confidence scores. + + The method takes the output probability distributions for each time step and uses + the best path decoding strategy. It then merges repeating characters and ignores + blank tokens. Confidence scores for each decoded character are also calculated. + + Args: + outputs (List[np.ndarray]): A list of model outputs, where each element is + a probability distribution for each time step. + + Returns: + Tuple[List[str], List[List[float]]]: A tuple of decoded character sequences + and confidence scores for each sequence. + """ + results = [] + confidences = [] + for output in outputs: + seq_log_probs = np.log(output + 1e-8) + best_path = np.argmax(seq_log_probs, axis=1) + + merged_path = [] + merged_probs = [] + for t, char_index in enumerate(best_path): + if char_index != 0 and (t == 0 or char_index != best_path[t - 1]): + merged_path.append(char_index) + merged_probs.append(seq_log_probs[t, char_index]) + + result = "".join(self.char_map[idx] for idx in merged_path) + results.append(result) + + confidence = np.exp(merged_probs).tolist() + confidences.append(confidence) + + return results, confidences diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index cc54ba548..a2de88394 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -77,6 +77,10 @@ class Embeddings: if config.semantic_search.model_size == "large" else "jinaai/jina-clip-v1-vision_model_quantized.onnx", "jinaai/jina-clip-v1-preprocessor_config.json", + "facenet-facenet.onnx", + "paddleocr-onnx-detection.onnx", + "paddleocr-onnx-classification.onnx", + "paddleocr-onnx-recognition.onnx", ] for model in models: @@ -138,6 +142,47 @@ class Embeddings: device="GPU", ) + self.lpr_detection_model = None + self.lpr_classification_model = None + self.lpr_recognition_model = None + + if self.config.lpr.enabled: + self.lpr_detection_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="detection.onnx", + download_urls={ + "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_detect, + requestor=self.requestor, + device="CPU", + ) + + self.lpr_classification_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="classification.onnx", + download_urls={ + "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_classify, + requestor=self.requestor, + device="CPU", + ) + + self.lpr_recognition_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="recognition.onnx", + download_urls={ + "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_recognize, + requestor=self.requestor, + device="CPU", + ) + def embed_thumbnail( self, event_id: str, thumbnail: bytes, upsert: bool = True ) -> ndarray: diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 9fc71d502..200f728d3 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -38,6 +38,9 @@ class ModelTypeEnum(str, Enum): face = "face" vision = "vision" text = "text" + alpr_detect = "alpr_detect" + alpr_classify = "alpr_classify" + alpr_recognize = "alpr_recognize" class GenericONNXEmbedding: @@ -89,7 +92,7 @@ class GenericONNXEmbedding: files_names, ModelStatusTypesEnum.downloaded, ) - self._load_model_and_tokenizer() + self._load_model_and_utils() logger.debug(f"models are already downloaded for {self.model_name}") def _download_model(self, path: str): @@ -129,7 +132,7 @@ class GenericONNXEmbedding: }, ) - def _load_model_and_tokenizer(self): + def _load_model_and_utils(self): if self.runner is None: if self.downloader: self.downloader.wait_for_download() @@ -139,6 +142,12 @@ class GenericONNXEmbedding: self.feature_extractor = self._load_feature_extractor() elif self.model_type == ModelTypeEnum.face: self.feature_extractor = [] + elif self.model_type == ModelTypeEnum.alpr_detect: + self.feature_extractor = [] + elif self.model_type == ModelTypeEnum.alpr_classify: + self.feature_extractor = [] + elif self.model_type == ModelTypeEnum.alpr_recognize: + self.feature_extractor = [] self.runner = ONNXModelRunner( os.path.join(self.download_path, self.model_file), @@ -214,6 +223,21 @@ class GenericONNXEmbedding: frame = np.expand_dims(frame, axis=0) return [{"image_input": frame}] + elif self.model_type == ModelTypeEnum.alpr_detect: + preprocessed = [] + for x in raw_inputs: + preprocessed.append(x) + return [{"x": preprocessed[0]}] + elif self.model_type == ModelTypeEnum.alpr_classify: + processed = [] + for img in raw_inputs: + processed.append({"x": img}) + return processed + elif self.model_type == ModelTypeEnum.alpr_recognize: + processed = [] + for img in raw_inputs: + processed.append({"x": img}) + return processed else: raise ValueError(f"Unable to preprocess inputs for {self.model_type}") @@ -230,7 +254,7 @@ class GenericONNXEmbedding: def __call__( self, inputs: Union[List[str], List[Image.Image], List[str]] ) -> List[np.ndarray]: - self._load_model_and_tokenizer() + self._load_model_and_utils() if self.runner is None or ( self.tokenizer is None and self.feature_extractor is None ): diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 76b174862..4aa523501 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -22,6 +22,7 @@ from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscrib from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION +from frigate.embeddings.alpr.alpr import LicensePlateRecognition from frigate.events.types import EventTypeEnum from frigate.genai import get_genai_client from frigate.models import Event @@ -73,6 +74,18 @@ class EmbeddingMaintainer(threading.Thread): self.tracked_events: dict[str, list[any]] = {} self.genai_client = get_genai_client(config) + # set license plate recognition conditions + self.lpr_config = self.config.lpr + self.requires_license_plate_detection = ( + "license_plate" not in self.config.model.all_attributes + ) + self.detected_license_plates: dict[str, dict[str, any]] = {} + + if self.lpr_config.enabled: + self.license_plate_recognition = LicensePlateRecognition( + self.lpr_config, self.requestor, self.embeddings + ) + @property def face_detector(self) -> cv2.FaceDetectorYN: # Lazily create the classifier. @@ -171,8 +184,12 @@ class EmbeddingMaintainer(threading.Thread): camera_config = self.config.cameras[camera] - # no need to process updated objects if face recognition and genai are disabled - if not camera_config.genai.enabled and not self.face_recognition_enabled: + # no need to process updated objects if face recognition, lpr, genai are disabled + if ( + not camera_config.genai.enabled + and not self.face_recognition_enabled + and not self.lpr_config.enabled + ): return # Create our own thumbnail based on the bounding box and the frame time @@ -190,6 +207,9 @@ class EmbeddingMaintainer(threading.Thread): if self.face_recognition_enabled: self._process_face(data, yuv_frame) + if self.lpr_config.enabled: + self._process_license_plate(data, yuv_frame) + # no need to save our own thumbnails if genai is not enabled # or if the object has become stationary if self.genai_client is not None and not data["stationary"]: @@ -221,6 +241,9 @@ class EmbeddingMaintainer(threading.Thread): if event_id in self.detected_faces: self.detected_faces.pop(event_id) + if event_id in self.detected_license_plates: + self.detected_license_plates.pop(event_id) + if updated_db: try: event: Event = Event.get(Event.id == event_id) @@ -465,6 +488,181 @@ class EmbeddingMaintainer(threading.Thread): if resp.status_code == 200: self.detected_faces[id] = avg_score + def _detect_license_plate(self, input: np.ndarray) -> tuple[int, int, int, int]: + """Return the dimensions of the input image as [x, y, width, height].""" + height, width = input.shape[:2] + return (0, 0, width, height) + + def _process_license_plate( + self, obj_data: dict[str, any], frame: np.ndarray + ) -> None: + """Look for license plates in image.""" + id = obj_data["id"] + + # don't run for non car objects + if obj_data.get("label") != "car": + logger.debug("Not a processing license plate for non car object.") + return + + # don't run for stationary car objects + if obj_data.get("stationary") == True: + logger.debug("Not a processing license plate for a stationary car object.") + return + + # don't overwrite sub label for objects that have a sub label + # that is not a license plate + if obj_data.get("sub_label") and id not in self.detected_license_plates: + logger.debug( + f"Not processing license plate due to existing sub label: {obj_data.get('sub_label')}." + ) + return + + license_plate: Optional[dict[str, any]] = None + + if self.requires_license_plate_detection: + logger.debug("Running manual license_plate detection.") + car_box = obj_data.get("box") + + if not car_box: + return None + + rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) + left, top, right, bottom = car_box + car = rgb[top:bottom, left:right] + license_plate = self._detect_license_plate(car) + + if not license_plate: + logger.debug("Detected no license plates for car object.") + return + + license_plate_frame = car[ + license_plate[1] : license_plate[3], license_plate[0] : license_plate[2] + ] + license_plate_frame = cv2.cvtColor(license_plate_frame, cv2.COLOR_RGB2BGR) + else: + # don't run for object without attributes + if not obj_data.get("current_attributes"): + logger.debug("No attributes to parse.") + return + + attributes: list[dict[str, any]] = obj_data.get("current_attributes", []) + for attr in attributes: + if attr.get("label") != "license_plate": + continue + + if license_plate is None or attr.get("score", 0.0) > license_plate.get( + "score", 0.0 + ): + license_plate = attr + + # no license plates detected in this frame + if not license_plate: + return + + license_plate_box = license_plate.get("box") + + # check that license plate is valid + if ( + not license_plate_box + or area(license_plate_box) < self.config.lpr.min_area + ): + logger.debug(f"Invalid license plate box {license_plate}") + return + + license_plate_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) + license_plate_frame = license_plate_frame[ + license_plate_box[1] : license_plate_box[3], + license_plate_box[0] : license_plate_box[2], + ] + + # run detection, returns results sorted by confidence, best first + license_plates, confidences, areas = ( + self.license_plate_recognition.process_license_plate(license_plate_frame) + ) + + logger.debug(f"Text boxes: {license_plates}") + logger.debug(f"Confidences: {confidences}") + logger.debug(f"Areas: {areas}") + + if license_plates: + for plate, confidence, text_area in zip(license_plates, confidences, areas): + avg_confidence = ( + (sum(confidence) / len(confidence)) if confidence else 0 + ) + + logger.debug( + f"Detected text: {plate} (average confidence: {avg_confidence:.2f}, area: {text_area} pixels)" + ) + else: + # no plates found + logger.debug("No text detected") + return + + top_plate, top_char_confidences = license_plates[0], confidences[0] + avg_confidence = sum(top_char_confidences) / len(top_char_confidences) + + # Check if we have a previously detected plate for this ID + if id in self.detected_license_plates: + prev_plate = self.detected_license_plates[id]["plate"] + prev_char_confidences = self.detected_license_plates[id]["char_confidences"] + prev_avg_confidence = sum(prev_char_confidences) / len( + prev_char_confidences + ) + + # Define conditions for keeping the previous plate + shorter_than_previous = len(top_plate) < len(prev_plate) + lower_avg_confidence = avg_confidence <= prev_avg_confidence + + # Compare character-by-character confidence where possible + min_length = min(len(top_plate), len(prev_plate)) + char_confidence_comparison = sum( + 1 + for i in range(min_length) + if top_char_confidences[i] <= prev_char_confidences[i] + ) + worse_char_confidences = char_confidence_comparison >= min_length / 2 + + if shorter_than_previous or ( + lower_avg_confidence and worse_char_confidences + ): + logger.debug( + f"Keeping previous plate. New plate stats: " + f"length={len(top_plate)}, avg_conf={avg_confidence:.2f} " + f"vs Previous: length={len(prev_plate)}, avg_conf={prev_avg_confidence:.2f}" + ) + return + + # Check against minimum confidence threshold + if avg_confidence < self.lpr_config.threshold: + logger.debug( + f"Average confidence {avg_confidence} is less than threshold ({self.lpr_config.threshold})" + ) + return + + # Determine subLabel based on known plates + # Default to the detected plate, use label name if there's a match + sub_label = top_plate + for label, plates in self.lpr_config.known_plates.items(): + if top_plate in plates: + sub_label = label + break + + # Send the result to the API + resp = requests.post( + f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label", + json={ + "camera": obj_data.get("camera"), + "subLabel": sub_label, + "subLabelScore": avg_confidence, + }, + ) + + if resp.status_code == 200: + self.detected_license_plates[id] = { + "plate": top_plate, + "char_confidences": top_char_confidences, + } + def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: """Return jpg thumbnail of a region of the frame.""" frame = cv2.cvtColor(yuv_frame, cv2.COLOR_YUV2BGR_I420) From cc358f6a8123b453207eb9773a410a7deaf9b842 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Sat, 26 Oct 2024 12:14:21 -0600 Subject: [PATCH 07/18] Fix label check (#14610) * Create config for parsing object * Use in maintainer --- frigate/config/camera/objects.py | 19 ++++++++++++++++++- frigate/config/config.py | 9 ++------- frigate/embeddings/maintainer.py | 4 ++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/frigate/config/camera/objects.py b/frigate/config/camera/objects.py index 22cd92f1c..578f8e677 100644 --- a/frigate/config/camera/objects.py +++ b/frigate/config/camera/objects.py @@ -1,6 +1,6 @@ from typing import Any, Optional, Union -from pydantic import Field, field_serializer +from pydantic import Field, PrivateAttr, field_serializer from ..base import FrigateBaseModel @@ -53,3 +53,20 @@ class ObjectConfig(FrigateBaseModel): default_factory=dict, title="Object filters." ) mask: Union[str, list[str]] = Field(default="", title="Object mask.") + _all_objects: list[str] = PrivateAttr() + + @property + def all_objects(self) -> list[str]: + return self._all_objects + + def parse_all_objects(self, cameras): + if "_all_objects" in self: + return + + # get list of unique enabled labels for tracking + enabled_labels = set(self.track) + + for camera in cameras.values(): + enabled_labels.update(camera.objects.track) + + self._all_objects = list(enabled_labels) diff --git a/frigate/config/config.py b/frigate/config/config.py index 1b5ced152..e9f231662 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -599,13 +599,8 @@ class FrigateConfig(FrigateBaseModel): verify_autotrack_zones(camera_config) verify_motion_and_detect(camera_config) - # get list of unique enabled labels for tracking - enabled_labels = set(self.objects.track) - - for camera in self.cameras.values(): - enabled_labels.update(camera.objects.track) - - self.model.create_colormap(sorted(enabled_labels)) + self.objects.parse_all_objects(self.cameras) + self.model.create_colormap(sorted(self.objects.all_objects)) self.model.check_and_load_plus_model(self.plus_api) for key, detector in self.detectors.items(): diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 4aa523501..00804c63f 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -65,7 +65,7 @@ class EmbeddingMaintainer(threading.Thread): # set face recognition conditions self.face_recognition_enabled = self.config.face_recognition.enabled - self.requires_face_detection = "face" not in self.config.model.all_attributes + self.requires_face_detection = "face" not in self.config.objects.all_objects self.detected_faces: dict[str, float] = {} # create communication for updating event descriptions @@ -77,7 +77,7 @@ class EmbeddingMaintainer(threading.Thread): # set license plate recognition conditions self.lpr_config = self.config.lpr self.requires_license_plate_detection = ( - "license_plate" not in self.config.model.all_attributes + "license_plate" not in self.config.objects.all_objects ) self.detected_license_plates: dict[str, dict[str, any]] = {} From 0d768461c82e02cae50f96f9a51cc0735dc7ddb7 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sat, 26 Oct 2024 17:27:02 -0500 Subject: [PATCH 08/18] Prevent division by zero in lpr confidence checks (#14615) --- frigate/embeddings/maintainer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 00804c63f..2084e6b7a 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -599,14 +599,20 @@ class EmbeddingMaintainer(threading.Thread): return top_plate, top_char_confidences = license_plates[0], confidences[0] - avg_confidence = sum(top_char_confidences) / len(top_char_confidences) + avg_confidence = ( + (sum(top_char_confidences) / len(top_char_confidences)) + if top_char_confidences + else 0 + ) # Check if we have a previously detected plate for this ID if id in self.detected_license_plates: prev_plate = self.detected_license_plates[id]["plate"] prev_char_confidences = self.detected_license_plates[id]["char_confidences"] - prev_avg_confidence = sum(prev_char_confidences) / len( - prev_char_confidences + prev_avg_confidence = ( + (sum(prev_char_confidences) / len(prev_char_confidences)) + if prev_char_confidences + else 0 ) # Define conditions for keeping the previous plate From debc77b6c4c51b97e1333d660eb3d1dc6d0987fc Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:49:12 -0500 Subject: [PATCH 09/18] LPR improvements (#14641) --- frigate/embeddings/alpr/alpr.py | 19 ++++++++++++++++++- frigate/embeddings/maintainer.py | 15 +++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/frigate/embeddings/alpr/alpr.py b/frigate/embeddings/alpr/alpr.py index b91a50e3a..16eba9989 100644 --- a/frigate/embeddings/alpr/alpr.py +++ b/frigate/embeddings/alpr/alpr.py @@ -13,6 +13,8 @@ from frigate.embeddings.embeddings import Embeddings logger = logging.getLogger(__name__) +MIN_PLATE_LENGTH = 3 + class LicensePlateRecognition: def __init__( @@ -197,7 +199,22 @@ class LicensePlateRecognition: average_confidences[original_idx] = average_confidence areas[original_idx] = area - return license_plates, average_confidences, areas + # Filter out plates that have a length of less than 3 characters + # Sort by area, then by plate length, then by confidence all desc + sorted_data = sorted( + [ + (plate, conf, area) + for plate, conf, area in zip( + license_plates, average_confidences, areas + ) + if len(plate) >= MIN_PLATE_LENGTH + ], + key=lambda x: (x[2], len(x[0]), x[1]), + reverse=True, + ) + + if sorted_data: + return map(list, zip(*sorted_data)) return [], [], [] diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 2084e6b7a..325a2e8dd 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -598,7 +598,11 @@ class EmbeddingMaintainer(threading.Thread): logger.debug("No text detected") return - top_plate, top_char_confidences = license_plates[0], confidences[0] + top_plate, top_char_confidences, top_area = ( + license_plates[0], + confidences[0], + areas[0], + ) avg_confidence = ( (sum(top_char_confidences) / len(top_char_confidences)) if top_char_confidences @@ -609,6 +613,7 @@ class EmbeddingMaintainer(threading.Thread): if id in self.detected_license_plates: prev_plate = self.detected_license_plates[id]["plate"] prev_char_confidences = self.detected_license_plates[id]["char_confidences"] + prev_area = self.detected_license_plates[id]["area"] prev_avg_confidence = ( (sum(prev_char_confidences) / len(prev_char_confidences)) if prev_char_confidences @@ -618,6 +623,7 @@ class EmbeddingMaintainer(threading.Thread): # Define conditions for keeping the previous plate shorter_than_previous = len(top_plate) < len(prev_plate) lower_avg_confidence = avg_confidence <= prev_avg_confidence + smaller_area = top_area < prev_area # Compare character-by-character confidence where possible min_length = min(len(top_plate), len(prev_plate)) @@ -628,13 +634,13 @@ class EmbeddingMaintainer(threading.Thread): ) worse_char_confidences = char_confidence_comparison >= min_length / 2 - if shorter_than_previous or ( + if (shorter_than_previous or smaller_area) and ( lower_avg_confidence and worse_char_confidences ): logger.debug( f"Keeping previous plate. New plate stats: " - f"length={len(top_plate)}, avg_conf={avg_confidence:.2f} " - f"vs Previous: length={len(prev_plate)}, avg_conf={prev_avg_confidence:.2f}" + f"length={len(top_plate)}, avg_conf={avg_confidence:.2f}, area={top_area} " + f"vs Previous: length={len(prev_plate)}, avg_conf={prev_avg_confidence:.2f}, area={prev_area}" ) return @@ -667,6 +673,7 @@ class EmbeddingMaintainer(threading.Thread): self.detected_license_plates[id] = { "plate": top_plate, "char_confidences": top_char_confidences, + "area": top_area, } def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: From a89bc7b4a963ab9621362fdb359a659981a2d214 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Mon, 28 Oct 2024 17:35:20 -0600 Subject: [PATCH 10/18] Update facenet model (#14647) --- frigate/db/sqlitevecq.py | 2 +- frigate/embeddings/embeddings.py | 2 +- frigate/embeddings/functions/onnx.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py index 1447fd48f..d630e1ddf 100644 --- a/frigate/db/sqlitevecq.py +++ b/frigate/db/sqlitevecq.py @@ -63,6 +63,6 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): self.execute_sql(""" CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0( id TEXT PRIMARY KEY, - face_embedding FLOAT[128] distance_metric=cosine + face_embedding FLOAT[512] distance_metric=cosine ); """) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index a2de88394..23b8aa7ee 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -133,7 +133,7 @@ class Embeddings: model_name="facenet", model_file="facenet.onnx", download_urls={ - "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx", + "facenet.onnx": "https://github.com/NickM-27/facenet-onnx/releases/download/v1.0/facenet.onnx", "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx", }, model_size="large", diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 200f728d3..035dc1cc2 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -222,7 +222,7 @@ class GenericONNXEmbedding: frame[y_center : y_center + og_h, x_center : x_center + og_w] = og frame = np.expand_dims(frame, axis=0) - return [{"image_input": frame}] + return [{"input_2": frame}] elif self.model_type == ModelTypeEnum.alpr_detect: preprocessed = [] for x in raw_inputs: From 0fcdc909c2cde1f16b5e5c356f4f981ffcb870b7 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Fri, 1 Nov 2024 18:43:21 -0500 Subject: [PATCH 11/18] Use regular expressions for plate matching (#14727) --- .../configuration/license_plate_recognition.md | 9 +++------ frigate/embeddings/embeddings.py | 6 +++--- frigate/embeddings/functions/onnx.py | 18 +++++++++--------- .../embeddings/{alpr/alpr.py => lpr/lpr.py} | 0 frigate/embeddings/maintainer.py | 18 +++++++++++------- 5 files changed, 26 insertions(+), 25 deletions(-) rename frigate/embeddings/{alpr/alpr.py => lpr/lpr.py} (100%) diff --git a/docs/docs/configuration/license_plate_recognition.md b/docs/docs/configuration/license_plate_recognition.md index b2fbb6214..a2b976726 100644 --- a/docs/docs/configuration/license_plate_recognition.md +++ b/docs/docs/configuration/license_plate_recognition.md @@ -26,7 +26,7 @@ lpr: Several options are available to fine-tune the LPR feature. For example, you can adjust the `min_area` setting, which defines the minimum size in pixels a license plate must be before LPR runs. The default is 500 pixels. -Additionally, you can define `known_plates`, allowing Frigate to label tracked vehicles with custom sub_labels when a recognized plate is detected. This information is then accessible in the UI, filters, and notifications. +Additionally, you can define `known_plates` as strings or regular expressions, allowing Frigate to label tracked vehicles with custom sub_labels when a recognized plate is detected. This information is then accessible in the UI, filters, and notifications. ```yaml lpr: @@ -37,12 +37,9 @@ lpr: - "ABC-1234" - "ABC-I234" Johnny: - - "JHN-1234" - - "JMN-1234" - - "JHN-I234" + - "J*N-*234" # Using wildcards for H/M and 1/I Sally: - - "SLL-1234" - - "5LL-1234" + - "[S5]LL-1234" # Matches SLL-1234 and 5LL-1234 ``` In this example, "Wife's Car" will appear as the label for any vehicle matching the plate "ABC-1234." The model might occasionally interpret the digit 1 as a capital I (e.g., "ABC-I234"), so both variations are listed. Similarly, multiple possible variations are specified for Johnny and Sally. diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 23b8aa7ee..255b72cb2 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -154,7 +154,7 @@ class Embeddings: "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx" }, model_size="large", - model_type=ModelTypeEnum.alpr_detect, + model_type=ModelTypeEnum.lpr_detect, requestor=self.requestor, device="CPU", ) @@ -166,7 +166,7 @@ class Embeddings: "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx" }, model_size="large", - model_type=ModelTypeEnum.alpr_classify, + model_type=ModelTypeEnum.lpr_classify, requestor=self.requestor, device="CPU", ) @@ -178,7 +178,7 @@ class Embeddings: "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" }, model_size="large", - model_type=ModelTypeEnum.alpr_recognize, + model_type=ModelTypeEnum.lpr_recognize, requestor=self.requestor, device="CPU", ) diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 035dc1cc2..7375cf40e 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -38,9 +38,9 @@ class ModelTypeEnum(str, Enum): face = "face" vision = "vision" text = "text" - alpr_detect = "alpr_detect" - alpr_classify = "alpr_classify" - alpr_recognize = "alpr_recognize" + lpr_detect = "lpr_detect" + lpr_classify = "lpr_classify" + lpr_recognize = "lpr_recognize" class GenericONNXEmbedding: @@ -142,11 +142,11 @@ class GenericONNXEmbedding: self.feature_extractor = self._load_feature_extractor() elif self.model_type == ModelTypeEnum.face: self.feature_extractor = [] - elif self.model_type == ModelTypeEnum.alpr_detect: + elif self.model_type == ModelTypeEnum.lpr_detect: self.feature_extractor = [] - elif self.model_type == ModelTypeEnum.alpr_classify: + elif self.model_type == ModelTypeEnum.lpr_classify: self.feature_extractor = [] - elif self.model_type == ModelTypeEnum.alpr_recognize: + elif self.model_type == ModelTypeEnum.lpr_recognize: self.feature_extractor = [] self.runner = ONNXModelRunner( @@ -223,17 +223,17 @@ class GenericONNXEmbedding: frame = np.expand_dims(frame, axis=0) return [{"input_2": frame}] - elif self.model_type == ModelTypeEnum.alpr_detect: + elif self.model_type == ModelTypeEnum.lpr_detect: preprocessed = [] for x in raw_inputs: preprocessed.append(x) return [{"x": preprocessed[0]}] - elif self.model_type == ModelTypeEnum.alpr_classify: + elif self.model_type == ModelTypeEnum.lpr_classify: processed = [] for img in raw_inputs: processed.append({"x": img}) return processed - elif self.model_type == ModelTypeEnum.alpr_recognize: + elif self.model_type == ModelTypeEnum.lpr_recognize: processed = [] for img in raw_inputs: processed.append({"x": img}) diff --git a/frigate/embeddings/alpr/alpr.py b/frigate/embeddings/lpr/lpr.py similarity index 100% rename from frigate/embeddings/alpr/alpr.py rename to frigate/embeddings/lpr/lpr.py diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 325a2e8dd..f5fb7cc7c 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -3,6 +3,7 @@ import base64 import logging import os +import re import threading from multiprocessing.synchronize import Event as MpEvent from typing import Optional @@ -22,7 +23,7 @@ from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscrib from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION -from frigate.embeddings.alpr.alpr import LicensePlateRecognition +from frigate.embeddings.lpr.lpr import LicensePlateRecognition from frigate.events.types import EventTypeEnum from frigate.genai import get_genai_client from frigate.models import Event @@ -651,13 +652,16 @@ class EmbeddingMaintainer(threading.Thread): ) return - # Determine subLabel based on known plates + # Determine subLabel based on known plates, use regex matching # Default to the detected plate, use label name if there's a match - sub_label = top_plate - for label, plates in self.lpr_config.known_plates.items(): - if top_plate in plates: - sub_label = label - break + sub_label = next( + ( + label + for label, plates in self.lpr_config.known_plates.items() + if any(re.match(f"^{plate}$", top_plate) for plate in plates) + ), + top_plate, + ) # Send the result to the API resp = requests.post( From b4f86ee3ad66d9ef5826ba8a5d4903d1cc535ef1 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 6 Nov 2024 09:07:29 -0700 Subject: [PATCH 12/18] Use SVC to normalize and classify faces for recognition (#14835) * Add margin to detected faces for embeddings * Standardize pixel values for face input * Use SVC to classify faces * Clear classifier when new face is added * Formatting * Add dependency --- docker/main/requirements-wheels.txt | 8 ++- frigate/embeddings/functions/onnx.py | 3 + frigate/embeddings/maintainer.py | 82 ++++++++++++++-------------- frigate/util/model.py | 46 +++++++++++++++- 4 files changed, 93 insertions(+), 46 deletions(-) diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 30928d679..ce20b28de 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -13,9 +13,7 @@ markupsafe == 2.1.* python-multipart == 0.0.12 # General mypy == 1.6.1 -numpy == 1.26.* onvif_zeep == 0.2.12 -opencv-python-headless == 4.9.0.* paho-mqtt == 2.1.* pandas == 2.2.* peewee == 3.17.* @@ -29,11 +27,15 @@ ruamel.yaml == 0.18.* tzlocal == 5.2 requests == 2.32.* types-requests == 2.32.* -scipy == 1.13.* norfair == 2.2.* setproctitle == 1.3.* ws4py == 0.5.* unidecode == 1.3.* +# Image Manipulation +numpy == 1.26.* +opencv-python-headless == 4.9.0.* +scipy == 1.13.* +scikit-learn == 1.5.* # OpenVino & ONNX openvino == 2024.3.* onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64' diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 7375cf40e..8fd58c168 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -221,6 +221,9 @@ class GenericONNXEmbedding: # copy img image into center of result image frame[y_center : y_center + og_h, x_center : x_center + og_w] = og + # standardize pixel values across channels + mean, std = frame.mean(), frame.std() + frame = (frame - mean) / std frame = np.expand_dims(frame, axis=0) return [{"input_2": frame}] elif self.model_type == ModelTypeEnum.lpr_detect: diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index f5fb7cc7c..352998f2c 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -30,12 +30,12 @@ from frigate.models import Event from frigate.types import TrackedObjectUpdateTypesEnum from frigate.util.builtin import serialize from frigate.util.image import SharedMemoryFrameManager, area, calculate_region +from frigate.util.model import FaceClassificationModel from .embeddings import Embeddings logger = logging.getLogger(__name__) -REQUIRED_FACES = 2 MAX_THUMBNAILS = 10 @@ -68,6 +68,9 @@ class EmbeddingMaintainer(threading.Thread): self.face_recognition_enabled = self.config.face_recognition.enabled self.requires_face_detection = "face" not in self.config.objects.all_objects self.detected_faces: dict[str, float] = {} + self.face_classifier = ( + FaceClassificationModel(db) if self.face_recognition_enabled else None + ) # create communication for updating event descriptions self.requestor = InterProcessRequestor() @@ -138,13 +141,15 @@ class EmbeddingMaintainer(threading.Thread): self.embeddings.text_embedding([data])[0], pack=False ) elif topic == EmbeddingsRequestEnum.register_face.value: + if not self.face_recognition_enabled: + return False + if data.get("cropped"): self.embeddings.embed_face( data["face_name"], base64.b64decode(data["image"]), upsert=True, ) - return True else: img = cv2.imdecode( np.frombuffer( @@ -165,7 +170,8 @@ class EmbeddingMaintainer(threading.Thread): data["face_name"], webp.tobytes(), upsert=True ) - return False + self.face_classifier.clear_classifier() + return True except Exception as e: logger.error(f"Unable to handle embeddings request {e}") @@ -336,18 +342,6 @@ class EmbeddingMaintainer(threading.Thread): if event_id: self.handle_regenerate_description(event_id, source) - def _search_face(self, query_embedding: bytes) -> list[tuple[str, float]]: - """Search for the face most closely matching the embedding.""" - sql_query = f""" - SELECT - id, - distance - FROM vec_faces - WHERE face_embedding MATCH ? - AND k = {REQUIRED_FACES} ORDER BY distance - """ - return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall() - def _detect_face(self, input: np.ndarray) -> tuple[int, int, int, int]: """Detect faces in input image.""" self.face_detector.setInputSize((input.shape[1], input.shape[0])) @@ -400,13 +394,21 @@ class EmbeddingMaintainer(threading.Thread): rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) left, top, right, bottom = person_box person = rgb[top:bottom, left:right] - face = self._detect_face(person) + face_box = self._detect_face(person) - if not face: + if not face_box: logger.debug("Detected no faces for person object.") return - face_frame = person[face[1] : face[3], face[0] : face[2]] + margin = int((face_box[2] - face_box[0]) * 0.25) + face_frame = person[ + max(0, face_box[1] - margin) : min( + frame.shape[0], face_box[3] + margin + ), + max(0, face_box[0] - margin) : min( + frame.shape[1], face_box[2] + margin + ), + ] face_frame = cv2.cvtColor(face_frame, cv2.COLOR_RGB2BGR) else: # don't run for object without attributes @@ -434,8 +436,15 @@ class EmbeddingMaintainer(threading.Thread): return face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) + margin = int((face_box[2] - face_box[0]) * 0.25) + face_frame = face_frame[ - face_box[1] : face_box[3], face_box[0] : face_box[2] + max(0, face_box[1] - margin) : min( + frame.shape[0], face_box[3] + margin + ), + max(0, face_box[0] - margin) : min( + frame.shape[1], face_box[2] + margin + ), ] ret, webp = cv2.imencode( @@ -446,34 +455,23 @@ class EmbeddingMaintainer(threading.Thread): logger.debug("Not processing face due to error creating cropped image.") return - embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False) - query_embedding = serialize(embedding) - best_faces = self._search_face(query_embedding) - logger.debug(f"Detected best faces for person as: {best_faces}") + embedding = self.embeddings.embed_face("nick", webp.tobytes(), upsert=True) + res = self.face_classifier.classify_face(embedding) - if not best_faces or len(best_faces) < REQUIRED_FACES: - logger.debug(f"{len(best_faces)} < {REQUIRED_FACES} min required faces.") + if not res: return - sub_label = str(best_faces[0][0]).split("-")[0] - avg_score = 0 + sub_label, score = res - for face in best_faces: - score = 1.0 - face[1] + logger.debug( + f"Detected best face for person as: {sub_label} with score {score}" + ) - if face[0].split("-")[0] != sub_label: - logger.debug("Detected multiple faces, result is not valid.") - return - - avg_score += score - - avg_score = round(avg_score / REQUIRED_FACES, 2) - - if avg_score < self.config.face_recognition.threshold or ( - id in self.detected_faces and avg_score <= self.detected_faces[id] + if score < self.config.face_recognition.threshold or ( + id in self.detected_faces and score <= self.detected_faces[id] ): logger.debug( - f"Recognized face score {avg_score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})." + f"Recognized face score {score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})." ) return @@ -482,12 +480,12 @@ class EmbeddingMaintainer(threading.Thread): json={ "camera": obj_data.get("camera"), "subLabel": sub_label, - "subLabelScore": avg_score, + "subLabelScore": score, }, ) if resp.status_code == 200: - self.detected_faces[id] = avg_score + self.detected_faces[id] = score def _detect_license_plate(self, input: np.ndarray) -> tuple[int, int, int, int]: """Return the dimensions of the input image as [x, y, width, height].""" diff --git a/frigate/util/model.py b/frigate/util/model.py index ce2c9538c..03d3c2733 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -2,9 +2,15 @@ import logging import os -from typing import Any +from typing import Any, Optional +import numpy as np import onnxruntime as ort +from playhouse.sqliteq import SqliteQueueDatabase +from sklearn.preprocessing import LabelEncoder, Normalizer +from sklearn.svm import SVC + +from frigate.util.builtin import deserialize try: import openvino as ov @@ -148,3 +154,41 @@ class ONNXModelRunner: return [infer_request.get_output_tensor().data] elif self.type == "ort": return self.ort.run(None, input) + + +class FaceClassificationModel: + def __init__(self, db: SqliteQueueDatabase): + self.db = db + self.labeler: Optional[LabelEncoder] = None + self.classifier: Optional[SVC] = None + + def __build_classifier(self) -> None: + faces: list[tuple[str, bytes]] = self.db.execute_sql( + "SELECT id, face_embedding FROM vec_faces" + ).fetchall() + embeddings = np.array([deserialize(f[1]) for f in faces]) + self.labeler = LabelEncoder() + norms = Normalizer(norm="l2").transform(embeddings) + labels = self.labeler.fit_transform([f[0].split("-")[0] for f in faces]) + self.classifier = SVC(kernel="linear", probability=True) + self.classifier.fit(norms, labels) + + def clear_classifier(self) -> None: + self.classifier = None + self.labeler = None + + def classify_face(self, embedding: np.ndarray) -> Optional[tuple[str, float]]: + if not self.classifier: + self.__build_classifier() + + res = self.classifier.predict([embedding]) + + if not res: + return None + + label = res[0] + probabilities = self.classifier.predict_proba([embedding])[0] + return ( + self.labeler.inverse_transform([label])[0], + round(probabilities[label], 2), + ) From 2e8b7cbb9cc5cdc712af7e0329f883691bb03e01 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 6 Nov 2024 11:40:40 -0700 Subject: [PATCH 13/18] Remove hardcoded face name --- frigate/embeddings/maintainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 352998f2c..303690b5c 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -455,7 +455,7 @@ class EmbeddingMaintainer(threading.Thread): logger.debug("Not processing face due to error creating cropped image.") return - embedding = self.embeddings.embed_face("nick", webp.tobytes(), upsert=True) + embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False) res = self.face_classifier.classify_face(embedding) if not res: From 0a48a1e1d936106318a2656b801e484c907f838e Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 7 Nov 2024 16:26:49 -0700 Subject: [PATCH 14/18] Fix check --- frigate/util/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frigate/util/model.py b/frigate/util/model.py index 03d3c2733..d43320006 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -183,7 +183,7 @@ class FaceClassificationModel: res = self.classifier.predict([embedding]) - if not res: + if res is None: return None label = res[0] From 68a5bd9c24f33dd1eeee9903bdf91fa8aabc028f Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 7 Nov 2024 16:32:41 -0700 Subject: [PATCH 15/18] Remove standardization --- frigate/embeddings/functions/onnx.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 8fd58c168..67b2c44a2 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -220,10 +220,6 @@ class GenericONNXEmbedding: # copy img image into center of result image frame[y_center : y_center + og_h, x_center : x_center + og_w] = og - - # standardize pixel values across channels - mean, std = frame.mean(), frame.std() - frame = (frame - mean) / std frame = np.expand_dims(frame, axis=0) return [{"input_2": frame}] elif self.model_type == ModelTypeEnum.lpr_detect: From c6ac331d4d9877d20123dea005d919e003e80f3e Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 26 Nov 2024 13:41:49 -0700 Subject: [PATCH 16/18] Improve face recognition (#15205) * Validate faces using cosine distance and SVC * Formatting * Use opencv instead of face embedding * Update docs for training data * Adjust to score system * Set bounds * remove face embeddings * Update writing images * Add face library page * Add ability to select file * Install opencv deps * Cleanup * Use different deps * Move deps * Cleanup * Only show face library for desktop * Implement deleting * Add ability to upload image * Add support for uploading images --- docker/main/install_deps.sh | 4 +- docker/main/requirements-wheels.txt | 2 +- docs/docs/configuration/face_recognition.md | 16 +- frigate/api/classification.py | 29 +-- frigate/config/semantic_search.py | 5 +- frigate/db/sqlitevecq.py | 17 +- frigate/embeddings/__init__.py | 11 +- frigate/embeddings/embeddings.py | 51 +----- frigate/embeddings/maintainer.py | 60 ++++--- frigate/util/model.py | 56 +++--- web/src/App.tsx | 2 + .../overlay/dialog/UploadImageDialog.tsx | 88 +++++++++ web/src/hooks/use-navigation.ts | 19 +- web/src/pages/FaceLibrary.tsx | 170 ++++++++++++++++++ web/src/types/frigateConfig.ts | 4 + 15 files changed, 397 insertions(+), 137 deletions(-) create mode 100644 web/src/components/overlay/dialog/UploadImageDialog.tsx create mode 100644 web/src/pages/FaceLibrary.tsx diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh index 6c32ae168..af349fd84 100755 --- a/docker/main/install_deps.sh +++ b/docker/main/install_deps.sh @@ -16,7 +16,9 @@ apt-get -qq install --no-install-recommends -y \ curl \ lsof \ jq \ - nethogs + nethogs \ + libgl1 \ + libglib2.0-0 # ensure python3 defaults to python3.9 update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index ce20b28de..b163e8627 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -34,8 +34,8 @@ unidecode == 1.3.* # Image Manipulation numpy == 1.26.* opencv-python-headless == 4.9.0.* +opencv-contrib-python == 4.9.0.* scipy == 1.13.* -scikit-learn == 1.5.* # OpenVino & ONNX openvino == 2024.3.* onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64' diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md index af079a226..3e0cfd30c 100644 --- a/docs/docs/configuration/face_recognition.md +++ b/docs/docs/configuration/face_recognition.md @@ -18,4 +18,18 @@ Face recognition is disabled by default and requires semantic search to be enabl ```yaml face_recognition: enabled: true -``` \ No newline at end of file +``` + +## Dataset + +The number of images needed for a sufficient training set for face recognition varies depending on several factors: + +- Complexity of the task: A simple task like recognizing faces of known individuals may require fewer images than a complex task like identifying unknown individuals in a large crowd. +- Diversity of the dataset: A dataset with diverse images, including variations in lighting, pose, and facial expressions, will require fewer images per person than a less diverse dataset. +- Desired accuracy: The higher the desired accuracy, the more images are typically needed. + +However, here are some general guidelines: + +- Minimum: For basic face recognition tasks, a minimum of 10-20 images per person is often recommended. +- Recommended: For more robust and accurate systems, 30-50 images per person is a good starting point. +- Ideal: For optimal performance, especially in challenging conditions, 100 or more images per person can be beneficial. \ No newline at end of file diff --git a/frigate/api/classification.py b/frigate/api/classification.py index d862008c8..fe54bebe9 100644 --- a/frigate/api/classification.py +++ b/frigate/api/classification.py @@ -1,11 +1,14 @@ """Object classification APIs.""" import logging +import os from fastapi import APIRouter, Request, UploadFile from fastapi.responses import JSONResponse +from pathvalidate import sanitize_filename from frigate.api.defs.tags import Tags +from frigate.const import FACE_DIR from frigate.embeddings import EmbeddingsContext logger = logging.getLogger(__name__) @@ -15,20 +18,18 @@ router = APIRouter(tags=[Tags.events]) @router.get("/faces") def get_faces(): - return JSONResponse(content={"message": "there are faces"}) + face_dict: dict[str, list[str]] = {} + + for name in os.listdir(FACE_DIR): + face_dict[name] = [] + for file in os.listdir(os.path.join(FACE_DIR, name)): + face_dict[name].append(file) + + return JSONResponse(status_code=200, content=face_dict) @router.post("/faces/{name}") async def register_face(request: Request, name: str, file: UploadFile): - # if not file.content_type.startswith("image"): - # return JSONResponse( - # status_code=400, - # content={ - # "success": False, - # "message": "Only an image can be used to register a face.", - # }, - # ) - context: EmbeddingsContext = request.app.embeddings context.register_face(name, await file.read()) return JSONResponse( @@ -37,8 +38,8 @@ async def register_face(request: Request, name: str, file: UploadFile): ) -@router.delete("/faces") -def deregister_faces(request: Request, body: dict = None): +@router.post("/faces/{name}/delete") +def deregister_faces(request: Request, name: str, body: dict = None): json: dict[str, any] = body or {} list_of_ids = json.get("ids", "") @@ -49,7 +50,9 @@ def deregister_faces(request: Request, body: dict = None): ) context: EmbeddingsContext = request.app.embeddings - context.delete_face_ids(list_of_ids) + context.delete_face_ids( + name, map(lambda file: sanitize_filename(file), list_of_ids) + ) return JSONResponse( content=({"success": True, "message": "Successfully deleted faces."}), status_code=200, diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py index f5e881e4e..f0eb5d98c 100644 --- a/frigate/config/semantic_search.py +++ b/frigate/config/semantic_search.py @@ -24,7 +24,10 @@ class SemanticSearchConfig(FrigateBaseModel): class FaceRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable face recognition.") threshold: float = Field( - default=0.9, title="Face similarity score required to be considered a match." + default=170, + title="minimum face distance score required to be considered a match.", + gt=0.0, + le=1.0, ) min_area: int = Field( default=500, title="Min area of face box to consider running face recognition." diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py index d630e1ddf..ccb75ae54 100644 --- a/frigate/db/sqlitevecq.py +++ b/frigate/db/sqlitevecq.py @@ -29,10 +29,6 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): ids = ",".join(["?" for _ in event_ids]) self.execute_sql(f"DELETE FROM vec_descriptions WHERE id IN ({ids})", event_ids) - def delete_embeddings_face(self, face_ids: list[str]) -> None: - ids = ",".join(["?" for _ in face_ids]) - self.execute_sql(f"DELETE FROM vec_faces WHERE id IN ({ids})", face_ids) - def drop_embeddings_tables(self) -> None: self.execute_sql(""" DROP TABLE vec_descriptions; @@ -40,11 +36,8 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): self.execute_sql(""" DROP TABLE vec_thumbnails; """) - self.execute_sql(""" - DROP TABLE vec_faces; - """) - def create_embeddings_tables(self, face_recognition: bool) -> None: + def create_embeddings_tables(self) -> None: """Create vec0 virtual table for embeddings""" self.execute_sql(""" CREATE VIRTUAL TABLE IF NOT EXISTS vec_thumbnails USING vec0( @@ -58,11 +51,3 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase): description_embedding FLOAT[768] distance_metric=cosine ); """) - - if face_recognition: - self.execute_sql(""" - CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0( - id TEXT PRIMARY KEY, - face_embedding FLOAT[512] distance_metric=cosine - ); - """) diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index 235b15df3..9836ae28e 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -14,7 +14,7 @@ from setproctitle import setproctitle from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsRequestor from frigate.config import FrigateConfig -from frigate.const import CONFIG_DIR +from frigate.const import CONFIG_DIR, FACE_DIR from frigate.db.sqlitevecq import SqliteVecQueueDatabase from frigate.models import Event from frigate.util.builtin import serialize @@ -209,8 +209,13 @@ class EmbeddingsContext: return self.db.execute_sql(sql_query).fetchall() - def delete_face_ids(self, ids: list[str]) -> None: - self.db.delete_embeddings_face(ids) + def delete_face_ids(self, face: str, ids: list[str]) -> None: + folder = os.path.join(FACE_DIR, face) + for id in ids: + file_path = os.path.join(folder, id) + + if os.path.isfile(file_path): + os.unlink(file_path) def update_description(self, event_id: str, description: str) -> None: self.requestor.send_data( diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 255b72cb2..63597e49e 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -3,8 +3,6 @@ import base64 import logging import os -import random -import string import time from numpy import ndarray @@ -14,7 +12,6 @@ from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig from frigate.const import ( CONFIG_DIR, - FACE_DIR, UPDATE_EMBEDDINGS_REINDEX_PROGRESS, UPDATE_MODEL_STATE, ) @@ -68,7 +65,7 @@ class Embeddings: self.requestor = InterProcessRequestor() # Create tables if they don't exist - self.db.create_embeddings_tables(self.config.face_recognition.enabled) + self.db.create_embeddings_tables() models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", @@ -126,22 +123,6 @@ class Embeddings: device="GPU" if config.semantic_search.model_size == "large" else "CPU", ) - self.face_embedding = None - - if self.config.face_recognition.enabled: - self.face_embedding = GenericONNXEmbedding( - model_name="facenet", - model_file="facenet.onnx", - download_urls={ - "facenet.onnx": "https://github.com/NickM-27/facenet-onnx/releases/download/v1.0/facenet.onnx", - "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx", - }, - model_size="large", - model_type=ModelTypeEnum.face, - requestor=self.requestor, - device="GPU", - ) - self.lpr_detection_model = None self.lpr_classification_model = None self.lpr_recognition_model = None @@ -277,40 +258,12 @@ class Embeddings: return embeddings - def embed_face(self, label: str, thumbnail: bytes, upsert: bool = False) -> ndarray: - embedding = self.face_embedding(thumbnail)[0] - - if upsert: - rand_id = "".join( - random.choices(string.ascii_lowercase + string.digits, k=6) - ) - id = f"{label}-{rand_id}" - - # write face to library - folder = os.path.join(FACE_DIR, label) - file = os.path.join(folder, f"{id}.webp") - os.makedirs(folder, exist_ok=True) - - # save face image - with open(file, "wb") as output: - output.write(thumbnail) - - self.db.execute_sql( - """ - INSERT OR REPLACE INTO vec_faces(id, face_embedding) - VALUES(?, ?) - """, - (id, serialize(embedding)), - ) - - return embedding - def reindex(self) -> None: logger.info("Indexing tracked object embeddings...") self.db.drop_embeddings_tables() logger.debug("Dropped embeddings tables.") - self.db.create_embeddings_tables(self.config.face_recognition.enabled) + self.db.create_embeddings_tables() logger.debug("Created embeddings tables.") # Delete the saved stats file diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 303690b5c..3e0855b9b 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -3,7 +3,9 @@ import base64 import logging import os +import random import re +import string import threading from multiprocessing.synchronize import Event as MpEvent from typing import Optional @@ -22,7 +24,12 @@ from frigate.comms.event_metadata_updater import ( from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig -from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION +from frigate.const import ( + CLIPS_DIR, + FACE_DIR, + FRIGATE_LOCALHOST, + UPDATE_EVENT_DESCRIPTION, +) from frigate.embeddings.lpr.lpr import LicensePlateRecognition from frigate.events.types import EventTypeEnum from frigate.genai import get_genai_client @@ -69,7 +76,9 @@ class EmbeddingMaintainer(threading.Thread): self.requires_face_detection = "face" not in self.config.objects.all_objects self.detected_faces: dict[str, float] = {} self.face_classifier = ( - FaceClassificationModel(db) if self.face_recognition_enabled else None + FaceClassificationModel(self.config.face_recognition, db) + if self.face_recognition_enabled + else None ) # create communication for updating event descriptions @@ -144,12 +153,14 @@ class EmbeddingMaintainer(threading.Thread): if not self.face_recognition_enabled: return False + rand_id = "".join( + random.choices(string.ascii_lowercase + string.digits, k=6) + ) + label = data["face_name"] + id = f"{label}-{rand_id}" + if data.get("cropped"): - self.embeddings.embed_face( - data["face_name"], - base64.b64decode(data["image"]), - upsert=True, - ) + pass else: img = cv2.imdecode( np.frombuffer( @@ -163,12 +174,18 @@ class EmbeddingMaintainer(threading.Thread): return False face = img[face_box[1] : face_box[3], face_box[0] : face_box[2]] - ret, webp = cv2.imencode( + ret, thumbnail = cv2.imencode( ".webp", face, [int(cv2.IMWRITE_WEBP_QUALITY), 100] ) - self.embeddings.embed_face( - data["face_name"], webp.tobytes(), upsert=True - ) + + # write face to library + folder = os.path.join(FACE_DIR, label) + file = os.path.join(folder, f"{id}.webp") + os.makedirs(folder, exist_ok=True) + + # save face image + with open(file, "wb") as output: + output.write(thumbnail.tobytes()) self.face_classifier.clear_classifier() return True @@ -201,7 +218,9 @@ class EmbeddingMaintainer(threading.Thread): # Create our own thumbnail based on the bounding box and the frame time try: - yuv_frame = self.frame_manager.get(frame_name, camera_config.frame_shape_yuv) + yuv_frame = self.frame_manager.get( + frame_name, camera_config.frame_shape_yuv + ) except FileNotFoundError: pass @@ -447,16 +466,7 @@ class EmbeddingMaintainer(threading.Thread): ), ] - ret, webp = cv2.imencode( - ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100] - ) - - if not ret: - logger.debug("Not processing face due to error creating cropped image.") - return - - embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False) - res = self.face_classifier.classify_face(embedding) + res = self.face_classifier.classify_face(face_frame) if not res: return @@ -467,11 +477,9 @@ class EmbeddingMaintainer(threading.Thread): f"Detected best face for person as: {sub_label} with score {score}" ) - if score < self.config.face_recognition.threshold or ( - id in self.detected_faces and score <= self.detected_faces[id] - ): + if id in self.detected_faces and score <= self.detected_faces[id]: logger.debug( - f"Recognized face score {score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})." + f"Recognized face distance {score} is less than previous face distance ({self.detected_faces.get(id)})." ) return diff --git a/frigate/util/model.py b/frigate/util/model.py index d43320006..2b7cf1848 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -4,13 +4,12 @@ import logging import os from typing import Any, Optional +import cv2 import numpy as np import onnxruntime as ort from playhouse.sqliteq import SqliteQueueDatabase -from sklearn.preprocessing import LabelEncoder, Normalizer -from sklearn.svm import SVC -from frigate.util.builtin import deserialize +from frigate.config.semantic_search import FaceRecognitionConfig try: import openvino as ov @@ -21,6 +20,9 @@ except ImportError: logger = logging.getLogger(__name__) +MIN_MATCHING_FACES = 2 + + def get_ort_providers( force_cpu: bool = False, device: str = "AUTO", requires_fp16: bool = False ) -> tuple[list[str], list[dict[str, any]]]: @@ -157,38 +159,42 @@ class ONNXModelRunner: class FaceClassificationModel: - def __init__(self, db: SqliteQueueDatabase): + def __init__(self, config: FaceRecognitionConfig, db: SqliteQueueDatabase): + self.config = config self.db = db - self.labeler: Optional[LabelEncoder] = None - self.classifier: Optional[SVC] = None + self.recognizer = cv2.face.LBPHFaceRecognizer_create(radius=4, threshold=(1 - config.threshold) * 1000) + self.label_map: dict[int, str] = {} def __build_classifier(self) -> None: - faces: list[tuple[str, bytes]] = self.db.execute_sql( - "SELECT id, face_embedding FROM vec_faces" - ).fetchall() - embeddings = np.array([deserialize(f[1]) for f in faces]) - self.labeler = LabelEncoder() - norms = Normalizer(norm="l2").transform(embeddings) - labels = self.labeler.fit_transform([f[0].split("-")[0] for f in faces]) - self.classifier = SVC(kernel="linear", probability=True) - self.classifier.fit(norms, labels) + labels = [] + faces = [] + + dir = "/media/frigate/clips/faces" + for idx, name in enumerate(os.listdir(dir)): + self.label_map[idx] = name + face_folder = os.path.join(dir, name) + for image in os.listdir(face_folder): + img = cv2.imread(os.path.join(face_folder, image)) + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + equ = cv2.equalizeHist(gray) + faces.append(equ) + labels.append(idx) + + self.recognizer.train(faces, np.array(labels)) def clear_classifier(self) -> None: self.classifier = None self.labeler = None - def classify_face(self, embedding: np.ndarray) -> Optional[tuple[str, float]]: - if not self.classifier: + def classify_face(self, face_image: np.ndarray) -> Optional[tuple[str, float]]: + if not self.label_map: self.__build_classifier() - res = self.classifier.predict([embedding]) + index, distance = self.recognizer.predict(cv2.equalizeHist(cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY))) - if res is None: + if index == -1: return None - label = res[0] - probabilities = self.classifier.predict_proba([embedding])[0] - return ( - self.labeler.inverse_transform([label])[0], - round(probabilities[label], 2), - ) + score = 1.0 - (distance / 1000) + return self.label_map[index], round(score, 2) + diff --git a/web/src/App.tsx b/web/src/App.tsx index 3bc2e7836..ef0a9497e 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -19,6 +19,7 @@ const ConfigEditor = lazy(() => import("@/pages/ConfigEditor")); const System = lazy(() => import("@/pages/System")); const Settings = lazy(() => import("@/pages/Settings")); const UIPlayground = lazy(() => import("@/pages/UIPlayground")); +const FaceLibrary = lazy(() => import("@/pages/FaceLibrary")); const Logs = lazy(() => import("@/pages/Logs")); function App() { @@ -51,6 +52,7 @@ function App() { } /> } /> } /> + } /> } /> diff --git a/web/src/components/overlay/dialog/UploadImageDialog.tsx b/web/src/components/overlay/dialog/UploadImageDialog.tsx new file mode 100644 index 000000000..b4fbd5065 --- /dev/null +++ b/web/src/components/overlay/dialog/UploadImageDialog.tsx @@ -0,0 +1,88 @@ +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Form, FormControl, FormField, FormItem } from "@/components/ui/form"; +import { Input } from "@/components/ui/input"; +import { zodResolver } from "@hookform/resolvers/zod"; +import { useCallback } from "react"; +import { useForm } from "react-hook-form"; +import { z } from "zod"; + +type UploadImageDialogProps = { + open: boolean; + title: string; + description?: string; + setOpen: (open: boolean) => void; + onSave: (file: File) => void; +}; +export default function UploadImageDialog({ + open, + title, + description, + setOpen, + onSave, +}: UploadImageDialogProps) { + const formSchema = z.object({ + file: z.instanceof(FileList, { message: "Please select an image file." }), + }); + + const form = useForm>({ + resolver: zodResolver(formSchema), + }); + const fileRef = form.register("file"); + + // upload handler + + const onSubmit = useCallback( + (data: z.infer) => { + if (!data["file"]) { + return; + } + + onSave(data["file"]["0"]); + }, + [onSave], + ); + + return ( + + + + {title} + {description && {description}} + +
+ + ( + + + + + + )} + /> + + + + + + +
+
+ ); +} diff --git a/web/src/hooks/use-navigation.ts b/web/src/hooks/use-navigation.ts index 06ebd6c1d..daed383d3 100644 --- a/web/src/hooks/use-navigation.ts +++ b/web/src/hooks/use-navigation.ts @@ -1,20 +1,29 @@ import { ENV } from "@/env"; +import { FrigateConfig } from "@/types/frigateConfig"; import { NavData } from "@/types/navigation"; import { useMemo } from "react"; +import { isDesktop } from "react-device-detect"; import { FaCompactDisc, FaVideo } from "react-icons/fa"; import { IoSearch } from "react-icons/io5"; import { LuConstruction } from "react-icons/lu"; import { MdVideoLibrary } from "react-icons/md"; +import { TbFaceId } from "react-icons/tb"; +import useSWR from "swr"; export const ID_LIVE = 1; export const ID_REVIEW = 2; export const ID_EXPLORE = 3; export const ID_EXPORT = 4; export const ID_PLAYGROUND = 5; +export const ID_FACE_LIBRARY = 6; export default function useNavigation( variant: "primary" | "secondary" = "primary", ) { + const { data: config } = useSWR("config", { + revalidateOnFocus: false, + }); + return useMemo( () => [ @@ -54,7 +63,15 @@ export default function useNavigation( url: "/playground", enabled: ENV !== "production", }, + { + id: ID_FACE_LIBRARY, + variant, + icon: TbFaceId, + title: "Face Library", + url: "/faces", + enabled: isDesktop && config?.face_recognition.enabled, + }, ] as NavData[], - [variant], + [config?.face_recognition.enabled, variant], ); } diff --git a/web/src/pages/FaceLibrary.tsx b/web/src/pages/FaceLibrary.tsx new file mode 100644 index 000000000..e955a17de --- /dev/null +++ b/web/src/pages/FaceLibrary.tsx @@ -0,0 +1,170 @@ +import { baseUrl } from "@/api/baseUrl"; +import Chip from "@/components/indicators/Chip"; +import UploadImageDialog from "@/components/overlay/dialog/UploadImageDialog"; +import { Button } from "@/components/ui/button"; +import { ScrollArea, ScrollBar } from "@/components/ui/scroll-area"; +import { Toaster } from "@/components/ui/sonner"; +import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group"; +import useOptimisticState from "@/hooks/use-optimistic-state"; +import axios from "axios"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { isDesktop } from "react-device-detect"; +import { LuImagePlus, LuTrash } from "react-icons/lu"; +import { toast } from "sonner"; +import useSWR from "swr"; + +export default function FaceLibrary() { + const [page, setPage] = useState(); + const [pageToggle, setPageToggle] = useOptimisticState(page, setPage, 100); + const tabsRef = useRef(null); + + // face data + + const { data: faceData } = useSWR("faces"); + + const faces = useMemo( + () => (faceData ? Object.keys(faceData) : []), + [faceData], + ); + const faceImages = useMemo( + () => (pageToggle && faceData ? faceData[pageToggle] : []), + [pageToggle, faceData], + ); + + useEffect(() => { + if (!pageToggle && faces) { + setPageToggle(faces[0]); + } + // we need to listen on the value of the faces list + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [faces]); + + // upload + + const [upload, setUpload] = useState(false); + + const onUploadImage = useCallback( + (file: File) => { + const formData = new FormData(); + formData.append("file", file); + axios.post(`faces/${pageToggle}`, formData, { + headers: { + "Content-Type": "multipart/form-data", + }, + }); + }, + [pageToggle], + ); + + return ( +
+ + + + +
+ +
+ { + if (value) { + setPageToggle(value); + } + }} + > + {Object.values(faces).map((item) => ( + +
{item}
+
+ ))} +
+ +
+
+
+ {pageToggle && ( +
+ {faceImages.map((image: string) => ( + + ))} + +
+ )} +
+ ); +} + +type FaceImageProps = { + name: string; + image: string; +}; +function FaceImage({ name, image }: FaceImageProps) { + const [hovered, setHovered] = useState(false); + + const onDelete = useCallback(() => { + axios + .post(`/faces/${name}/delete`, { ids: [image] }) + .then((resp) => { + if (resp.status == 200) { + toast.error(`Successfully deleted face.`, { position: "top-center" }); + } + }) + .catch((error) => { + if (error.response?.data?.message) { + toast.error(`Failed to delete: ${error.response.data.message}`, { + position: "top-center", + }); + } else { + toast.error(`Failed to delete: ${error.message}`, { + position: "top-center", + }); + } + }); + }, [name, image]); + + return ( +
setHovered(true) : undefined} + onMouseLeave={isDesktop ? () => setHovered(false) : undefined} + onClick={isDesktop ? undefined : () => setHovered(!hovered)} + > + {hovered && ( +
+ onDelete()} + > + + +
+ )} + +
+ ); +} diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 5c5971fc0..1413efbad 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -287,6 +287,10 @@ export interface FrigateConfig { environment_vars: Record; + face_recognition: { + enabled: boolean; + }; + ffmpeg: { global_args: string[]; hwaccel_args: string; From 5db16c3272a5e906a53f1c5d2ce9438744d3bab1 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 27 Nov 2024 12:48:39 -0700 Subject: [PATCH 17/18] Face recognition fixes (#15222) * Fix nginx max upload size * Close upload dialog when done and add toasts * Formatting * fix ruff --- .../rootfs/usr/local/nginx/conf/nginx.conf | 5 ++- frigate/util/model.py | 9 +++-- web/src/pages/FaceLibrary.tsx | 37 +++++++++++++++---- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf index fa487a083..61b598859 100644 --- a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf +++ b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf @@ -81,6 +81,9 @@ http { open_file_cache_errors on; aio on; + # file upload size + client_max_body_size 10M; + # https://github.com/kaltura/nginx-vod-module#vod_open_file_thread_pool vod_open_file_thread_pool default; @@ -246,8 +249,6 @@ http { proxy_no_cache $should_not_cache; add_header X-Cache-Status $upstream_cache_status; - client_max_body_size 10M; - location /api/vod/ { include auth_request.conf; proxy_pass http://frigate_api/vod/; diff --git a/frigate/util/model.py b/frigate/util/model.py index 2b7cf1848..6a0ec4331 100644 --- a/frigate/util/model.py +++ b/frigate/util/model.py @@ -162,7 +162,9 @@ class FaceClassificationModel: def __init__(self, config: FaceRecognitionConfig, db: SqliteQueueDatabase): self.config = config self.db = db - self.recognizer = cv2.face.LBPHFaceRecognizer_create(radius=4, threshold=(1 - config.threshold) * 1000) + self.recognizer = cv2.face.LBPHFaceRecognizer_create( + radius=4, threshold=(1 - config.threshold) * 1000 + ) self.label_map: dict[int, str] = {} def __build_classifier(self) -> None: @@ -190,11 +192,12 @@ class FaceClassificationModel: if not self.label_map: self.__build_classifier() - index, distance = self.recognizer.predict(cv2.equalizeHist(cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY))) + index, distance = self.recognizer.predict( + cv2.equalizeHist(cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY)) + ) if index == -1: return None score = 1.0 - (distance / 1000) return self.label_map[index], round(score, 2) - diff --git a/web/src/pages/FaceLibrary.tsx b/web/src/pages/FaceLibrary.tsx index e955a17de..5ea215dfd 100644 --- a/web/src/pages/FaceLibrary.tsx +++ b/web/src/pages/FaceLibrary.tsx @@ -20,7 +20,7 @@ export default function FaceLibrary() { // face data - const { data: faceData } = useSWR("faces"); + const { data: faceData, mutate: refreshFaces } = useSWR("faces"); const faces = useMemo( () => (faceData ? Object.keys(faceData) : []), @@ -47,13 +47,36 @@ export default function FaceLibrary() { (file: File) => { const formData = new FormData(); formData.append("file", file); - axios.post(`faces/${pageToggle}`, formData, { - headers: { - "Content-Type": "multipart/form-data", - }, - }); + axios + .post(`faces/${pageToggle}`, formData, { + headers: { + "Content-Type": "multipart/form-data", + }, + }) + .then((resp) => { + if (resp.status == 200) { + setUpload(false); + refreshFaces(); + toast.success( + "Successfully uploaded iamge. View the file in the /exports folder.", + { position: "top-center" }, + ); + } + }) + .catch((error) => { + if (error.response?.data?.message) { + toast.error( + `Failed to upload image: ${error.response.data.message}`, + { position: "top-center" }, + ); + } else { + toast.error(`Failed to upload image: ${error.message}`, { + position: "top-center", + }); + } + }); }, - [pageToggle], + [pageToggle, refreshFaces], ); return ( From 2e51ae2eb7a4b859543602a11e137b3692fc3f3e Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 12 Dec 2024 12:53:48 -0600 Subject: [PATCH 18/18] Reimplement downloading of face detection model (#15472) --- frigate/embeddings/embeddings.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 63597e49e..9e8b73ac9 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -123,6 +123,17 @@ class Embeddings: device="GPU" if config.semantic_search.model_size == "large" else "CPU", ) + if self.config.face_recognition.enabled: + self.face_embedding = GenericONNXEmbedding( + model_name="facedet", + model_file="facedet.onnx", + download_urls={ + "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx", + }, + model_type=ModelTypeEnum.face, + requestor=self.requestor, + ) + self.lpr_detection_model = None self.lpr_classification_model = None self.lpr_recognition_model = None