From 4ff0c8a8d1e63ef2860523ee7407b1e17826ba20 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 24 Oct 2024 16:00:39 -0600
Subject: [PATCH 1/8] Better review sub-labels (#14563)

* Better review sub-labels

* Handle init
---
 frigate/review/maintainer.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/frigate/review/maintainer.py b/frigate/review/maintainer.py
index bdb749dc8..d87e1d33c 100644
--- a/frigate/review/maintainer.py
+++ b/frigate/review/maintainer.py
@@ -51,7 +51,7 @@ class PendingReviewSegment:
         frame_time: float,
         severity: SeverityEnum,
         detections: dict[str, str],
-        sub_labels: set[str],
+        sub_labels: dict[str, str],
         zones: list[str],
         audio: set[str],
     ):
@@ -135,7 +135,7 @@ class PendingReviewSegment:
             ReviewSegment.data.name: {
                 "detections": list(set(self.detections.keys())),
                 "objects": list(set(self.detections.values())),
-                "sub_labels": list(self.sub_labels),
+                "sub_labels": list(self.sub_labels.values()),
                 "zones": self.zones,
                 "audio": list(self.audio),
             },
@@ -261,7 +261,7 @@ class ReviewSegmentMaintainer(threading.Thread):
                     segment.detections[object["id"]] = object["sub_label"][0]
                 else:
                     segment.detections[object["id"]] = f'{object["label"]}-verified'
-                    segment.sub_labels.add(object["sub_label"][0])
+                    segment.sub_labels[object["id"]] = object["sub_label"][0]
 
                 # if object is alert label
                 # and has entered required zones or required zones is not set
@@ -347,7 +347,7 @@ class ReviewSegmentMaintainer(threading.Thread):
 
         if len(active_objects) > 0:
             detections: dict[str, str] = {}
-            sub_labels = set()
+            sub_labels = dict[str, str] = {}
             zones: list[str] = []
             severity = None
 
@@ -358,7 +358,7 @@ class ReviewSegmentMaintainer(threading.Thread):
                     detections[object["id"]] = object["sub_label"][0]
                 else:
                     detections[object["id"]] = f'{object["label"]}-verified'
-                    sub_labels.add(object["sub_label"][0])
+                    sub_labels[object["id"]] = object["sub_label"][0]
 
                 # if object is alert label
                 # and has entered required zones or required zones is not set
@@ -566,7 +566,7 @@ class ReviewSegmentMaintainer(threading.Thread):
                             frame_time,
                             severity,
                             {},
-                            set(),
+                            {},
                             [],
                             detections,
                         )
@@ -576,7 +576,7 @@ class ReviewSegmentMaintainer(threading.Thread):
                         frame_time,
                         SeverityEnum.alert,
                         {manual_info["event_id"]: manual_info["label"]},
-                        set(),
+                        {},
                         [],
                         set(),
                     )

From 2d27e72ed9907ccac352557d93c46e591909eeac Mon Sep 17 00:00:00 2001
From: Corwin <info@corwindev.nl>
Date: Fri, 25 Oct 2024 14:07:01 +0200
Subject: [PATCH 2/8] fix: hailo driver wrong version name (#14575)

---
 docker/hailo8l/user_installation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/hailo8l/user_installation.sh b/docker/hailo8l/user_installation.sh
index 734c640f9..853652ffa 100644
--- a/docker/hailo8l/user_installation.sh
+++ b/docker/hailo8l/user_installation.sh
@@ -38,7 +38,7 @@ cd ../../
 if [ ! -d /lib/firmware/hailo ]; then
   sudo mkdir /lib/firmware/hailo
 fi
-sudo mv hailo8_fw.4.17.0.bin /lib/firmware/hailo/hailo8_fw.bin
+sudo mv hailo8_fw.4.18.0.bin /lib/firmware/hailo/hailo8_fw.bin
 
 # Install udev rules
 sudo cp ./linux/pcie/51-hailo-udev.rules /etc/udev/rules.d/

From 4dadf6d35337f2b402784abd99de6085ba198f45 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Fri, 25 Oct 2024 07:24:04 -0500
Subject: [PATCH 3/8] Bugfixes (#14587)

* Ensure review and search item mobile pages reopen correctly

* disable pan/pinch/zoom when native browser video controls are displayed

* report 0 for storage usage when api returns null
---
 .../components/graph/CombinedStorageGraph.tsx |  2 +-
 web/src/components/mobile/MobilePage.tsx      |  8 ++++-
 .../overlay/detail/ReviewDetailDialog.tsx     | 31 ++++++++++++-------
 .../overlay/detail/SearchDetailDialog.tsx     | 25 +++++++++------
 web/src/components/player/HlsVideoPlayer.tsx  |  1 +
 5 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/web/src/components/graph/CombinedStorageGraph.tsx b/web/src/components/graph/CombinedStorageGraph.tsx
index ff605ac02..2a52d82b6 100644
--- a/web/src/components/graph/CombinedStorageGraph.tsx
+++ b/web/src/components/graph/CombinedStorageGraph.tsx
@@ -216,7 +216,7 @@ export function CombinedStorageGraph({
                     </Popover>
                   )}
                 </TableCell>
-                <TableCell>{getUnitSize(item.usage)}</TableCell>
+                <TableCell>{getUnitSize(item.usage ?? 0)}</TableCell>
                 <TableCell>{item.data[0].toFixed(2)}%</TableCell>
                 <TableCell>
                   {item.name === "Unused"
diff --git a/web/src/components/mobile/MobilePage.tsx b/web/src/components/mobile/MobilePage.tsx
index 52bc4d9fe..169b5e524 100644
--- a/web/src/components/mobile/MobilePage.tsx
+++ b/web/src/components/mobile/MobilePage.tsx
@@ -25,7 +25,13 @@ export function MobilePage({
   const [uncontrolledOpen, setUncontrolledOpen] = useState(false);
 
   const open = controlledOpen ?? uncontrolledOpen;
-  const setOpen = onOpenChange ?? setUncontrolledOpen;
+  const setOpen = (value: boolean) => {
+    if (onOpenChange) {
+      onOpenChange(value);
+    } else {
+      setUncontrolledOpen(value);
+    }
+  };
 
   return (
     <MobilePageContext.Provider value={{ open, onOpenChange: setOpen }}>
diff --git a/web/src/components/overlay/detail/ReviewDetailDialog.tsx b/web/src/components/overlay/detail/ReviewDetailDialog.tsx
index 0b20ff9bc..2230046f3 100644
--- a/web/src/components/overlay/detail/ReviewDetailDialog.tsx
+++ b/web/src/components/overlay/detail/ReviewDetailDialog.tsx
@@ -13,7 +13,7 @@ import { getIconForLabel } from "@/utils/iconUtil";
 import { useApiHost } from "@/api";
 import { ReviewDetailPaneType, ReviewSegment } from "@/types/review";
 import { Event } from "@/types/event";
-import { useEffect, useMemo, useRef, useState } from "react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { cn } from "@/lib/utils";
 import { FrigatePlusDialog } from "../dialog/FrigatePlusDialog";
 import ObjectLifecycle from "./ObjectLifecycle";
@@ -91,6 +91,22 @@ export default function ReviewDetailDialog({
     review != undefined,
   );
 
+  const handleOpenChange = useCallback(
+    (open: boolean) => {
+      setIsOpen(open);
+      if (!open) {
+        // short timeout to allow the mobile page animation
+        // to complete before updating the state
+        setTimeout(() => {
+          setReview(undefined);
+          setSelectedEvent(undefined);
+          setPane("overview");
+        }, 300);
+      }
+    },
+    [setReview, setIsOpen],
+  );
+
   useEffect(() => {
     setIsOpen(review != undefined);
     // we know that these deps are correct
@@ -109,16 +125,7 @@ export default function ReviewDetailDialog({
 
   return (
     <>
-      <Overlay
-        open={isOpen ?? false}
-        onOpenChange={(open) => {
-          if (!open) {
-            setReview(undefined);
-            setSelectedEvent(undefined);
-            setPane("overview");
-          }
-        }}
-      >
+      <Overlay open={isOpen ?? false} onOpenChange={handleOpenChange}>
         <FrigatePlusDialog
           upload={upload}
           onClose={() => setUpload(undefined)}
@@ -140,7 +147,7 @@ export default function ReviewDetailDialog({
         >
           <span tabIndex={0} className="sr-only" />
           {pane == "overview" && (
-            <Header className="justify-center" onClose={() => setIsOpen(false)}>
+            <Header className="justify-center">
               <Title>Review Item Details</Title>
               <Description className="sr-only">Review item details</Description>
               <div
diff --git a/web/src/components/overlay/detail/SearchDetailDialog.tsx b/web/src/components/overlay/detail/SearchDetailDialog.tsx
index 78796036f..9159813f4 100644
--- a/web/src/components/overlay/detail/SearchDetailDialog.tsx
+++ b/web/src/components/overlay/detail/SearchDetailDialog.tsx
@@ -109,6 +109,20 @@ export default function SearchDetailDialog({
 
   const [isOpen, setIsOpen] = useState(search != undefined);
 
+  const handleOpenChange = useCallback(
+    (open: boolean) => {
+      setIsOpen(open);
+      if (!open) {
+        // short timeout to allow the mobile page animation
+        // to complete before updating the state
+        setTimeout(() => {
+          setSearch(undefined);
+        }, 300);
+      }
+    },
+    [setSearch],
+  );
+
   useEffect(() => {
     if (search) {
       setIsOpen(search != undefined);
@@ -158,14 +172,7 @@ export default function SearchDetailDialog({
   const Description = isDesktop ? DialogDescription : MobilePageDescription;
 
   return (
-    <Overlay
-      open={isOpen}
-      onOpenChange={() => {
-        if (search) {
-          setSearch(undefined);
-        }
-      }}
-    >
+    <Overlay open={isOpen} onOpenChange={handleOpenChange}>
       <Content
         className={cn(
           "scrollbar-container overflow-y-auto",
@@ -174,7 +181,7 @@ export default function SearchDetailDialog({
           isMobile && "px-4",
         )}
       >
-        <Header onClose={() => setIsOpen(false)}>
+        <Header>
           <Title>Tracked Object Details</Title>
           <Description className="sr-only">Tracked object details</Description>
         </Header>
diff --git a/web/src/components/player/HlsVideoPlayer.tsx b/web/src/components/player/HlsVideoPlayer.tsx
index bb0c89802..0661fb0c9 100644
--- a/web/src/components/player/HlsVideoPlayer.tsx
+++ b/web/src/components/player/HlsVideoPlayer.tsx
@@ -190,6 +190,7 @@ export default function HlsVideoPlayer({
       minScale={1.0}
       wheel={{ smoothStep: 0.005 }}
       onZoom={(zoom) => setZoomScale(zoom.state.scale)}
+      disabled={!frigateControls}
     >
       {frigateControls && (
         <VideoControls

From 054f26ce682003ea00c5d4eadef290a2598cb739 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Tue, 22 Oct 2024 15:59:33 -0600
Subject: [PATCH 4/8] Update version

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b7c6ab821..5500174af 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 default_target: local
 
 COMMIT_HASH := $(shell git log -1 --pretty=format:"%h"|tail -1)
-VERSION = 0.15.0
+VERSION = 0.16.0
 IMAGE_REPO ?= ghcr.io/blakeblackshear/frigate
 GITHUB_REF_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
 BOARDS= #Initialized empty

From 7852fd691b3632eef183080cdfcc43727879b525 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Tue, 22 Oct 2024 16:05:48 -0600
Subject: [PATCH 5/8] Face recognition backend (#14495)

* Add basic config and face recognition table

* Reconfigure updates processing to handle face

* Crop frame to face box

* Implement face embedding calculation

* Get matching face embeddings

* Add support face recognition based on existing faces

* Use arcface face embeddings instead of generic embeddings model

* Add apis for managing faces

* Implement face uploading API

* Build out more APIs

* Add min area config

* Handle larger images

* Add more debug logs

* fix calculation

* Reduce timeout

* Small tweaks

* Use webp images

* Use facenet model
---
 docker/main/requirements-wheels.txt  |   2 +
 frigate/api/classification.py        |  56 +++++++++
 frigate/api/defs/tags.py             |   1 +
 frigate/api/fastapi_app.py           |  12 +-
 frigate/comms/embeddings_updater.py  |   3 +-
 frigate/config/semantic_search.py    |  15 ++-
 frigate/const.py                     |   3 +-
 frigate/db/sqlitevecq.py             |  17 ++-
 frigate/embeddings/__init__.py       |  23 ++++
 frigate/embeddings/embeddings.py     |  50 +++++++-
 frigate/embeddings/functions/onnx.py |  54 ++++++++-
 frigate/embeddings/maintainer.py     | 166 ++++++++++++++++++++++-----
 frigate/util/downloader.py           |   4 +-
 13 files changed, 364 insertions(+), 42 deletions(-)
 create mode 100644 frigate/api/classification.py

diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
index 795456588..abccd1592 100644
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@@ -8,6 +8,8 @@ imutils == 0.5.*
 joserfc == 1.0.*
 pathvalidate == 3.2.*
 markupsafe == 2.1.*
+python-multipart == 0.0.12
+# General
 mypy == 1.6.1
 numpy == 1.26.*
 onvif_zeep == 0.2.12
diff --git a/frigate/api/classification.py b/frigate/api/classification.py
new file mode 100644
index 000000000..d862008c8
--- /dev/null
+++ b/frigate/api/classification.py
@@ -0,0 +1,56 @@
+"""Object classification APIs."""
+
+import logging
+
+from fastapi import APIRouter, Request, UploadFile
+from fastapi.responses import JSONResponse
+
+from frigate.api.defs.tags import Tags
+from frigate.embeddings import EmbeddingsContext
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=[Tags.events])
+
+
+@router.get("/faces")
+def get_faces():
+    return JSONResponse(content={"message": "there are faces"})
+
+
+@router.post("/faces/{name}")
+async def register_face(request: Request, name: str, file: UploadFile):
+    # if not file.content_type.startswith("image"):
+    #    return JSONResponse(
+    #        status_code=400,
+    #        content={
+    #            "success": False,
+    #            "message": "Only an image can be used to register a face.",
+    #        },
+    #    )
+
+    context: EmbeddingsContext = request.app.embeddings
+    context.register_face(name, await file.read())
+    return JSONResponse(
+        status_code=200,
+        content={"success": True, "message": "Successfully registered face."},
+    )
+
+
+@router.delete("/faces")
+def deregister_faces(request: Request, body: dict = None):
+    json: dict[str, any] = body or {}
+    list_of_ids = json.get("ids", "")
+
+    if not list_of_ids or len(list_of_ids) == 0:
+        return JSONResponse(
+            content=({"success": False, "message": "Not a valid list of ids"}),
+            status_code=404,
+        )
+
+    context: EmbeddingsContext = request.app.embeddings
+    context.delete_face_ids(list_of_ids)
+    return JSONResponse(
+        content=({"success": True, "message": "Successfully deleted faces."}),
+        status_code=200,
+    )
diff --git a/frigate/api/defs/tags.py b/frigate/api/defs/tags.py
index 80faf255c..9e61da9e9 100644
--- a/frigate/api/defs/tags.py
+++ b/frigate/api/defs/tags.py
@@ -10,4 +10,5 @@ class Tags(Enum):
     review = "Review"
     export = "Export"
     events = "Events"
+    classification = "classification"
     auth = "Auth"
diff --git a/frigate/api/fastapi_app.py b/frigate/api/fastapi_app.py
index e3542458e..942964d58 100644
--- a/frigate/api/fastapi_app.py
+++ b/frigate/api/fastapi_app.py
@@ -11,7 +11,16 @@ from starlette_context import middleware, plugins
 from starlette_context.plugins import Plugin
 
 from frigate.api import app as main_app
-from frigate.api import auth, event, export, media, notification, preview, review
+from frigate.api import (
+    auth,
+    classification,
+    event,
+    export,
+    media,
+    notification,
+    preview,
+    review,
+)
 from frigate.api.auth import get_jwt_secret, limiter
 from frigate.comms.event_metadata_updater import (
     EventMetadataPublisher,
@@ -95,6 +104,7 @@ def create_fastapi_app(
     # Routes
     # Order of include_router matters: https://fastapi.tiangolo.com/tutorial/path-params/#order-matters
     app.include_router(auth.router)
+    app.include_router(classification.router)
     app.include_router(review.router)
     app.include_router(main_app.router)
     app.include_router(preview.router)
diff --git a/frigate/comms/embeddings_updater.py b/frigate/comms/embeddings_updater.py
index 9a13525f8..095f33fde 100644
--- a/frigate/comms/embeddings_updater.py
+++ b/frigate/comms/embeddings_updater.py
@@ -12,6 +12,7 @@ class EmbeddingsRequestEnum(Enum):
     embed_description = "embed_description"
     embed_thumbnail = "embed_thumbnail"
     generate_search = "generate_search"
+    register_face = "register_face"
 
 
 class EmbeddingsResponder:
@@ -22,7 +23,7 @@ class EmbeddingsResponder:
 
     def check_for_request(self, process: Callable) -> None:
         while True:  # load all messages that are queued
-            has_message, _, _ = zmq.select([self.socket], [], [], 0.1)
+            has_message, _, _ = zmq.select([self.socket], [], [], 0.01)
 
             if not has_message:
                 break
diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py
index 2891050a1..32ff8cf3c 100644
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@@ -4,7 +4,17 @@ from pydantic import Field
 
 from .base import FrigateBaseModel
 
-__all__ = ["SemanticSearchConfig"]
+__all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]
+
+
+class FaceRecognitionConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable face recognition.")
+    threshold: float = Field(
+        default=0.9, title="Face similarity score required to be considered a match."
+    )
+    min_area: int = Field(
+        default=500, title="Min area of face box to consider running face recognition."
+    )
 
 
 class SemanticSearchConfig(FrigateBaseModel):
@@ -12,6 +22,9 @@ class SemanticSearchConfig(FrigateBaseModel):
     reindex: Optional[bool] = Field(
         default=False, title="Reindex all detections on startup."
     )
+    face_recognition: FaceRecognitionConfig = Field(
+        default_factory=FaceRecognitionConfig, title="Face recognition config."
+    )
     model_size: str = Field(
         default="small", title="The size of the embeddings model used."
     )
diff --git a/frigate/const.py b/frigate/const.py
index c83b10e73..41a2fbc15 100644
--- a/frigate/const.py
+++ b/frigate/const.py
@@ -5,8 +5,9 @@ DEFAULT_DB_PATH = f"{CONFIG_DIR}/frigate.db"
 MODEL_CACHE_DIR = f"{CONFIG_DIR}/model_cache"
 BASE_DIR = "/media/frigate"
 CLIPS_DIR = f"{BASE_DIR}/clips"
-RECORD_DIR = f"{BASE_DIR}/recordings"
 EXPORT_DIR = f"{BASE_DIR}/exports"
+FACE_DIR = f"{CLIPS_DIR}/faces"
+RECORD_DIR = f"{BASE_DIR}/recordings"
 BIRDSEYE_PIPE = "/tmp/cache/birdseye"
 CACHE_DIR = "/tmp/cache"
 FRIGATE_LOCALHOST = "http://127.0.0.1:5000"
diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py
index ccb75ae54..1447fd48f 100644
--- a/frigate/db/sqlitevecq.py
+++ b/frigate/db/sqlitevecq.py
@@ -29,6 +29,10 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase):
         ids = ",".join(["?" for _ in event_ids])
         self.execute_sql(f"DELETE FROM vec_descriptions WHERE id IN ({ids})", event_ids)
 
+    def delete_embeddings_face(self, face_ids: list[str]) -> None:
+        ids = ",".join(["?" for _ in face_ids])
+        self.execute_sql(f"DELETE FROM vec_faces WHERE id IN ({ids})", face_ids)
+
     def drop_embeddings_tables(self) -> None:
         self.execute_sql("""
             DROP TABLE vec_descriptions;
@@ -36,8 +40,11 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase):
         self.execute_sql("""
             DROP TABLE vec_thumbnails;
         """)
+        self.execute_sql("""
+            DROP TABLE vec_faces;
+        """)
 
-    def create_embeddings_tables(self) -> None:
+    def create_embeddings_tables(self, face_recognition: bool) -> None:
         """Create vec0 virtual table for embeddings"""
         self.execute_sql("""
             CREATE VIRTUAL TABLE IF NOT EXISTS vec_thumbnails USING vec0(
@@ -51,3 +58,11 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase):
                 description_embedding FLOAT[768] distance_metric=cosine
             );
         """)
+
+        if face_recognition:
+            self.execute_sql("""
+                CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0(
+                    id TEXT PRIMARY KEY,
+                    face_embedding FLOAT[128] distance_metric=cosine
+                );
+            """)
diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py
index 7f2e1a10c..235b15df3 100644
--- a/frigate/embeddings/__init__.py
+++ b/frigate/embeddings/__init__.py
@@ -1,5 +1,6 @@
 """SQLite-vec embeddings database."""
 
+import base64
 import json
 import logging
 import multiprocessing as mp
@@ -189,6 +190,28 @@ class EmbeddingsContext:
 
         return results
 
+    def register_face(self, face_name: str, image_data: bytes) -> None:
+        self.requestor.send_data(
+            EmbeddingsRequestEnum.register_face.value,
+            {
+                "face_name": face_name,
+                "image": base64.b64encode(image_data).decode("ASCII"),
+            },
+        )
+
+    def get_face_ids(self, name: str) -> list[str]:
+        sql_query = f"""
+            SELECT
+                id
+            FROM vec_descriptions
+            WHERE id LIKE '%{name}%'
+        """
+
+        return self.db.execute_sql(sql_query).fetchall()
+
+    def delete_face_ids(self, ids: list[str]) -> None:
+        self.db.delete_embeddings_face(ids)
+
     def update_description(self, event_id: str, description: str) -> None:
         self.requestor.send_data(
             EmbeddingsRequestEnum.embed_description.value,
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index d77a9eecf..6b0f94ca9 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -3,6 +3,8 @@
 import base64
 import logging
 import os
+import random
+import string
 import time
 
 from numpy import ndarray
@@ -12,6 +14,7 @@ from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config.semantic_search import SemanticSearchConfig
 from frigate.const import (
     CONFIG_DIR,
+    FACE_DIR,
     UPDATE_EMBEDDINGS_REINDEX_PROGRESS,
     UPDATE_MODEL_STATE,
 )
@@ -67,7 +70,7 @@ class Embeddings:
         self.requestor = InterProcessRequestor()
 
         # Create tables if they don't exist
-        self.db.create_embeddings_tables()
+        self.db.create_embeddings_tables(self.config.face_recognition.enabled)
 
         models = [
             "jinaai/jina-clip-v1-text_model_fp16.onnx",
@@ -121,6 +124,21 @@ class Embeddings:
             device="GPU" if config.model_size == "large" else "CPU",
         )
 
+        self.face_embedding = None
+
+        if self.config.face_recognition.enabled:
+            self.face_embedding = GenericONNXEmbedding(
+                model_name="facenet",
+                model_file="facenet.onnx",
+                download_urls={
+                    "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.face,
+                requestor=self.requestor,
+                device="GPU",
+            )
+
     def embed_thumbnail(
         self, event_id: str, thumbnail: bytes, upsert: bool = True
     ) -> ndarray:
@@ -215,12 +233,40 @@ class Embeddings:
 
         return embeddings
 
+    def embed_face(self, label: str, thumbnail: bytes, upsert: bool = False) -> ndarray:
+        embedding = self.face_embedding(thumbnail)[0]
+
+        if upsert:
+            rand_id = "".join(
+                random.choices(string.ascii_lowercase + string.digits, k=6)
+            )
+            id = f"{label}-{rand_id}"
+
+            # write face to library
+            folder = os.path.join(FACE_DIR, label)
+            file = os.path.join(folder, f"{id}.webp")
+            os.makedirs(folder, exist_ok=True)
+
+            # save face image
+            with open(file, "wb") as output:
+                output.write(thumbnail)
+
+            self.db.execute_sql(
+                """
+                INSERT OR REPLACE INTO vec_faces(id, face_embedding)
+                VALUES(?, ?)
+                """,
+                (id, serialize(embedding)),
+            )
+
+        return embedding
+
     def reindex(self) -> None:
         logger.info("Indexing tracked object embeddings...")
 
         self.db.drop_embeddings_tables()
         logger.debug("Dropped embeddings tables.")
-        self.db.create_embeddings_tables()
+        self.db.create_embeddings_tables(self.config.face_recognition.enabled)
         logger.debug("Created embeddings tables.")
 
         # Delete the saved stats file
diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
index 6ea495a30..9fc71d502 100644
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@@ -31,6 +31,8 @@ warnings.filterwarnings(
 disable_progress_bar()
 logger = logging.getLogger(__name__)
 
+FACE_EMBEDDING_SIZE = 160
+
 
 class ModelTypeEnum(str, Enum):
     face = "face"
@@ -47,7 +49,7 @@ class GenericONNXEmbedding:
         model_file: str,
         download_urls: Dict[str, str],
         model_size: str,
-        model_type: str,
+        model_type: ModelTypeEnum,
         requestor: InterProcessRequestor,
         tokenizer_file: Optional[str] = None,
         device: str = "AUTO",
@@ -57,7 +59,7 @@ class GenericONNXEmbedding:
         self.tokenizer_file = tokenizer_file
         self.requestor = requestor
         self.download_urls = download_urls
-        self.model_type = model_type  # 'text' or 'vision'
+        self.model_type = model_type
         self.model_size = model_size
         self.device = device
         self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
@@ -93,6 +95,7 @@ class GenericONNXEmbedding:
     def _download_model(self, path: str):
         try:
             file_name = os.path.basename(path)
+
             if file_name in self.download_urls:
                 ModelDownloader.download_from_url(self.download_urls[file_name], path)
             elif (
@@ -101,6 +104,7 @@ class GenericONNXEmbedding:
             ):
                 if not os.path.exists(path + "/" + self.model_name):
                     logger.info(f"Downloading {self.model_name} tokenizer")
+
                 tokenizer = AutoTokenizer.from_pretrained(
                     self.model_name,
                     trust_remote_code=True,
@@ -131,8 +135,11 @@ class GenericONNXEmbedding:
                 self.downloader.wait_for_download()
             if self.model_type == ModelTypeEnum.text:
                 self.tokenizer = self._load_tokenizer()
-            else:
+            elif self.model_type == ModelTypeEnum.vision:
                 self.feature_extractor = self._load_feature_extractor()
+            elif self.model_type == ModelTypeEnum.face:
+                self.feature_extractor = []
+
             self.runner = ONNXModelRunner(
                 os.path.join(self.download_path, self.model_file),
                 self.device,
@@ -172,16 +179,51 @@ class GenericONNXEmbedding:
                 self.feature_extractor(images=image, return_tensors="np")
                 for image in processed_images
             ]
+        elif self.model_type == ModelTypeEnum.face:
+            if isinstance(raw_inputs, list):
+                raise ValueError("Face embedding does not support batch inputs.")
+
+            pil = self._process_image(raw_inputs)
+
+            # handle images larger than input size
+            width, height = pil.size
+            if width != FACE_EMBEDDING_SIZE or height != FACE_EMBEDDING_SIZE:
+                if width > height:
+                    new_height = int(((height / width) * FACE_EMBEDDING_SIZE) // 4 * 4)
+                    pil = pil.resize((FACE_EMBEDDING_SIZE, new_height))
+                else:
+                    new_width = int(((width / height) * FACE_EMBEDDING_SIZE) // 4 * 4)
+                    pil = pil.resize((new_width, FACE_EMBEDDING_SIZE))
+
+            og = np.array(pil).astype(np.float32)
+
+            # Image must be FACE_EMBEDDING_SIZExFACE_EMBEDDING_SIZE
+            og_h, og_w, channels = og.shape
+            frame = np.full(
+                (FACE_EMBEDDING_SIZE, FACE_EMBEDDING_SIZE, channels),
+                (0, 0, 0),
+                dtype=np.float32,
+            )
+
+            # compute center offset
+            x_center = (FACE_EMBEDDING_SIZE - og_w) // 2
+            y_center = (FACE_EMBEDDING_SIZE - og_h) // 2
+
+            # copy img image into center of result image
+            frame[y_center : y_center + og_h, x_center : x_center + og_w] = og
+
+            frame = np.expand_dims(frame, axis=0)
+            return [{"image_input": frame}]
         else:
             raise ValueError(f"Unable to preprocess inputs for {self.model_type}")
 
-    def _process_image(self, image):
+    def _process_image(self, image, output: str = "RGB") -> Image.Image:
         if isinstance(image, str):
             if image.startswith("http"):
                 response = requests.get(image)
-                image = Image.open(BytesIO(response.content)).convert("RGB")
+                image = Image.open(BytesIO(response.content)).convert(output)
         elif isinstance(image, bytes):
-            image = Image.open(BytesIO(image)).convert("RGB")
+            image = Image.open(BytesIO(image)).convert(output)
 
         return image
 
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index 1578a0fe3..d0f351233 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -9,6 +9,7 @@ from typing import Optional
 
 import cv2
 import numpy as np
+import requests
 from peewee import DoesNotExist
 from playhouse.sqliteq import SqliteQueueDatabase
 
@@ -20,12 +21,12 @@ from frigate.comms.event_metadata_updater import (
 from frigate.comms.events_updater import EventEndSubscriber, EventUpdateSubscriber
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import FrigateConfig
-from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
+from frigate.const import CLIPS_DIR, FRIGATE_LOCALHOST, UPDATE_EVENT_DESCRIPTION
 from frigate.events.types import EventTypeEnum
 from frigate.genai import get_genai_client
 from frigate.models import Event
 from frigate.util.builtin import serialize
-from frigate.util.image import SharedMemoryFrameManager, calculate_region
+from frigate.util.image import SharedMemoryFrameManager, area, calculate_region
 
 from .embeddings import Embeddings
 
@@ -58,10 +59,17 @@ class EmbeddingMaintainer(threading.Thread):
         )
         self.embeddings_responder = EmbeddingsResponder()
         self.frame_manager = SharedMemoryFrameManager()
+
+        # set face recognition conditions
+        self.face_recognition_enabled = (
+            self.config.semantic_search.face_recognition.enabled
+        )
+        self.requires_face_detection = "face" not in self.config.model.all_attributes
+
         # create communication for updating event descriptions
         self.requestor = InterProcessRequestor()
         self.stop_event = stop_event
-        self.tracked_events = {}
+        self.tracked_events: dict[str, list[any]] = {}
         self.genai_client = get_genai_client(config.genai)
 
     def run(self) -> None:
@@ -101,6 +109,13 @@ class EmbeddingMaintainer(threading.Thread):
                     return serialize(
                         self.embeddings.text_embedding([data])[0], pack=False
                     )
+                elif topic == EmbeddingsRequestEnum.register_face.value:
+                    self.embeddings.embed_face(
+                        data["face_name"],
+                        base64.b64decode(data["image"]),
+                        upsert=True,
+                    )
+                    return None
             except Exception as e:
                 logger.error(f"Unable to handle embeddings request {e}")
 
@@ -108,7 +123,7 @@ class EmbeddingMaintainer(threading.Thread):
 
     def _process_updates(self) -> None:
         """Process event updates"""
-        update = self.event_subscriber.check_for_update(timeout=0.1)
+        update = self.event_subscriber.check_for_update(timeout=0.01)
 
         if update is None:
             return
@@ -119,41 +134,48 @@ class EmbeddingMaintainer(threading.Thread):
             return
 
         camera_config = self.config.cameras[camera]
-        # no need to save our own thumbnails if genai is not enabled
-        # or if the object has become stationary
-        if (
-            not camera_config.genai.enabled
-            or self.genai_client is None
-            or data["stationary"]
-        ):
-            return
 
-        if data["id"] not in self.tracked_events:
-            self.tracked_events[data["id"]] = []
+        # no need to process updated objects if face recognition and genai are disabled
+        if not camera_config.genai.enabled and not self.face_recognition_enabled:
+            return
 
         # Create our own thumbnail based on the bounding box and the frame time
         try:
             frame_id = f"{camera}{data['frame_time']}"
             yuv_frame = self.frame_manager.get(frame_id, camera_config.frame_shape_yuv)
-
-            if yuv_frame is not None:
-                data["thumbnail"] = self._create_thumbnail(yuv_frame, data["box"])
-
-                # Limit the number of thumbnails saved
-                if len(self.tracked_events[data["id"]]) >= MAX_THUMBNAILS:
-                    # Always keep the first thumbnail for the event
-                    self.tracked_events[data["id"]].pop(1)
-
-                self.tracked_events[data["id"]].append(data)
-
-                self.frame_manager.close(frame_id)
         except FileNotFoundError:
             pass
 
+        if yuv_frame is None:
+            logger.debug(
+                "Unable to process object update because frame is unavailable."
+            )
+            return
+
+        if self.face_recognition_enabled:
+            self._process_face(data, yuv_frame)
+
+        # no need to save our own thumbnails if genai is not enabled
+        # or if the object has become stationary
+        if self.genai_client is not None and not data["stationary"]:
+            if data["id"] not in self.tracked_events:
+                self.tracked_events[data["id"]] = []
+
+            data["thumbnail"] = self._create_thumbnail(yuv_frame, data["box"])
+
+            # Limit the number of thumbnails saved
+            if len(self.tracked_events[data["id"]]) >= MAX_THUMBNAILS:
+                # Always keep the first thumbnail for the event
+                self.tracked_events[data["id"]].pop(1)
+
+            self.tracked_events[data["id"]].append(data)
+
+        self.frame_manager.close(frame_id)
+
     def _process_finalized(self) -> None:
         """Process the end of an event."""
         while True:
-            ended = self.event_end_subscriber.check_for_update(timeout=0.1)
+            ended = self.event_end_subscriber.check_for_update(timeout=0.01)
 
             if ended == None:
                 break
@@ -243,7 +265,7 @@ class EmbeddingMaintainer(threading.Thread):
     def _process_event_metadata(self):
         # Check for regenerate description requests
         (topic, event_id, source) = self.event_metadata_subscriber.check_for_update(
-            timeout=0.1
+            timeout=0.01
         )
 
         if topic is None:
@@ -252,6 +274,94 @@ class EmbeddingMaintainer(threading.Thread):
         if event_id:
             self.handle_regenerate_description(event_id, source)
 
+    def _search_face(self, query_embedding: bytes) -> list:
+        """Search for the face most closely matching the embedding."""
+        sql_query = """
+            SELECT
+                id,
+                distance
+            FROM vec_faces
+            WHERE face_embedding MATCH ?
+                AND k = 10 ORDER BY distance
+        """
+        return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()
+
+    def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
+        """Look for faces in image."""
+        # don't run for non person objects
+        if obj_data.get("label") != "person":
+            logger.debug("Not a processing face for non person object.")
+            return
+
+        # don't overwrite sub label for objects that have one
+        if obj_data.get("sub_label"):
+            logger.debug(
+                f"Not processing face due to existing sub label: {obj_data.get('sub_label')}."
+            )
+            return
+
+        face: Optional[dict[str, any]] = None
+
+        if self.requires_face_detection:
+            # TODO run cv2 face detection
+            pass
+        else:
+            # don't run for object without attributes
+            if not obj_data.get("current_attributes"):
+                logger.debug("No attributes to parse.")
+                return
+
+            attributes: list[dict[str, any]] = obj_data.get("current_attributes", [])
+            for attr in attributes:
+                if attr.get("label") != "face":
+                    continue
+
+                if face is None or attr.get("score", 0.0) > face.get("score", 0.0):
+                    face = attr
+
+        # no faces detected in this frame
+        if not face:
+            return
+
+        face_box = face.get("box")
+
+        # check that face is valid
+        if (
+            not face_box
+            or area(face_box) < self.config.semantic_search.face_recognition.min_area
+        ):
+            logger.debug(f"Invalid face box {face}")
+            return
+
+        face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
+        face_frame = face_frame[face_box[1] : face_box[3], face_box[0] : face_box[2]]
+        ret, jpg = cv2.imencode(
+            ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100]
+        )
+
+        if not ret:
+            logger.debug("Not processing face due to error creating cropped image.")
+            return
+
+        embedding = self.embeddings.embed_face("unknown", jpg.tobytes(), upsert=False)
+        query_embedding = serialize(embedding)
+        best_faces = self._search_face(query_embedding)
+        logger.debug(f"Detected best faces for person as: {best_faces}")
+
+        if not best_faces:
+            return
+
+        sub_label = str(best_faces[0][0]).split("-")[0]
+        score = 1.0 - best_faces[0][1]
+
+        if score < self.config.semantic_search.face_recognition.threshold:
+            return None
+
+        requests.post(
+            f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label",
+            json={"subLabel": sub_label, "subLabelScore": score},
+        )
+
     def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
         """Return jpg thumbnail of a region of the frame."""
         frame = cv2.cvtColor(yuv_frame, cv2.COLOR_YUV2BGR_I420)
diff --git a/frigate/util/downloader.py b/frigate/util/downloader.py
index 6685b0bb8..18c577fb0 100644
--- a/frigate/util/downloader.py
+++ b/frigate/util/downloader.py
@@ -101,7 +101,7 @@ class ModelDownloader:
         self.download_complete.set()
 
     @staticmethod
-    def download_from_url(url: str, save_path: str, silent: bool = False):
+    def download_from_url(url: str, save_path: str, silent: bool = False) -> Path:
         temporary_filename = Path(save_path).with_name(
             os.path.basename(save_path) + ".part"
         )
@@ -125,6 +125,8 @@ class ModelDownloader:
         if not silent:
             logger.info(f"Downloading complete: {url}")
 
+        return Path(save_path)
+
     @staticmethod
     def mark_files_state(
         requestor: InterProcessRequestor,

From f545efbeb80b42375a7c0ddbf054c70480b928b9 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Wed, 23 Oct 2024 09:03:18 -0600
Subject: [PATCH 6/8] Improve face recognition (#14537)

* Increase requirements for face to be set

* Manage faces properly

* Add basic docs

* Simplify

* Separate out face recognition frome semantic search

* Update docs

* Formatting
---
 docs/docs/configuration/face_recognition.md | 21 +++++++++
 docs/docs/configuration/reference.md        |  8 ++++
 docs/sidebars.ts                            |  1 +
 frigate/config/config.py                    | 16 ++++++-
 frigate/config/semantic_search.py           | 23 +++++-----
 frigate/embeddings/embeddings.py            | 12 +++--
 frigate/embeddings/maintainer.py            | 49 +++++++++++++++------
 7 files changed, 96 insertions(+), 34 deletions(-)
 create mode 100644 docs/docs/configuration/face_recognition.md

diff --git a/docs/docs/configuration/face_recognition.md b/docs/docs/configuration/face_recognition.md
new file mode 100644
index 000000000..af079a226
--- /dev/null
+++ b/docs/docs/configuration/face_recognition.md
@@ -0,0 +1,21 @@
+---
+id: face_recognition
+title: Face Recognition
+---
+
+Face recognition allows people to be assigned names and when their face is recognized Frigate will assign the person's name as a sub label. This information is included in the UI, filters, as well as in notifications.
+
+Frigate has support for FaceNet to create face embeddings, which runs locally. Embeddings are then saved to Frigate's database.
+
+## Minimum System Requirements
+
+Face recognition works by running a large AI model locally on your system. Systems without a GPU will not run Face Recognition reliably or at all.
+
+## Configuration
+
+Face recognition is disabled by default and requires semantic search to be enabled, face recognition must be enabled in your config file before it can be used. Semantic Search and face recognition are global configuration settings.
+
+```yaml
+face_recognition:
+  enabled: true
+```
\ No newline at end of file
diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
index 604791621..97ae70147 100644
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@@ -522,6 +522,14 @@ semantic_search:
   # NOTE: small model runs on CPU and large model runs on GPU
   model_size: "small"
 
+# Optional: Configuration for face recognition capability
+face_recognition:
+  # Optional: Enable semantic search (default: shown below)
+  enabled: False
+  # Optional: Set the model size used for embeddings. (default: shown below)
+  # NOTE: small model runs on CPU and large model runs on GPU
+  model_size: "small"
+
 # Optional: Configuration for AI generated tracked object descriptions
 # NOTE: Semantic Search must be enabled for this to do anything.
 # WARNING: Depending on the provider, this will send thumbnails over the internet
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index f8e8780b6..1038b1f98 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -36,6 +36,7 @@ const sidebars: SidebarsConfig = {
       'Semantic Search': [
         'configuration/semantic_search',
         'configuration/genai',
+        'configuration/face_recognition',
       ],
       Cameras: [
         'configuration/cameras',
diff --git a/frigate/config/config.py b/frigate/config/config.py
index b2373fdcc..97fe9f8ee 100644
--- a/frigate/config/config.py
+++ b/frigate/config/config.py
@@ -56,7 +56,7 @@ from .logger import LoggerConfig
 from .mqtt import MqttConfig
 from .notification import NotificationConfig
 from .proxy import ProxyConfig
-from .semantic_search import SemanticSearchConfig
+from .semantic_search import FaceRecognitionConfig, SemanticSearchConfig
 from .telemetry import TelemetryConfig
 from .tls import TlsConfig
 from .ui import UIConfig
@@ -159,6 +159,16 @@ class RestreamConfig(BaseModel):
     model_config = ConfigDict(extra="allow")
 
 
+def verify_semantic_search_dependent_configs(config: FrigateConfig) -> None:
+    """Verify that semantic search is enabled if required features are enabled."""
+    if not config.semantic_search.enabled:
+        if config.genai.enabled:
+            raise ValueError("Genai requires semantic search to be enabled.")
+
+        if config.face_recognition.enabled:
+            raise ValueError("Face recognition requires semantic to be enabled.")
+
+
 def verify_config_roles(camera_config: CameraConfig) -> None:
     """Verify that roles are setup in the config correctly."""
     assigned_roles = list(
@@ -316,6 +326,9 @@ class FrigateConfig(FrigateBaseModel):
     semantic_search: SemanticSearchConfig = Field(
         default_factory=SemanticSearchConfig, title="Semantic search configuration."
     )
+    face_recognition: FaceRecognitionConfig = Field(
+        default_factory=FaceRecognitionConfig, title="Face recognition config."
+    )
     ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.")
 
     # Detector config
@@ -621,6 +634,7 @@ class FrigateConfig(FrigateBaseModel):
             detector_config.model.compute_model_hash()
             self.detectors[key] = detector_config
 
+        verify_semantic_search_dependent_configs(self)
         return self
 
     @field_validator("cameras")
diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py
index 32ff8cf3c..7ffdbd137 100644
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@@ -7,6 +7,16 @@ from .base import FrigateBaseModel
 __all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]
 
 
+class SemanticSearchConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable semantic search.")
+    reindex: Optional[bool] = Field(
+        default=False, title="Reindex all detections on startup."
+    )
+    model_size: str = Field(
+        default="small", title="The size of the embeddings model used."
+    )
+
+
 class FaceRecognitionConfig(FrigateBaseModel):
     enabled: bool = Field(default=False, title="Enable face recognition.")
     threshold: float = Field(
@@ -15,16 +25,3 @@ class FaceRecognitionConfig(FrigateBaseModel):
     min_area: int = Field(
         default=500, title="Min area of face box to consider running face recognition."
     )
-
-
-class SemanticSearchConfig(FrigateBaseModel):
-    enabled: bool = Field(default=False, title="Enable semantic search.")
-    reindex: Optional[bool] = Field(
-        default=False, title="Reindex all detections on startup."
-    )
-    face_recognition: FaceRecognitionConfig = Field(
-        default_factory=FaceRecognitionConfig, title="Face recognition config."
-    )
-    model_size: str = Field(
-        default="small", title="The size of the embeddings model used."
-    )
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index 6b0f94ca9..3fb6d5d26 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -11,7 +11,7 @@ from numpy import ndarray
 from playhouse.shortcuts import model_to_dict
 
 from frigate.comms.inter_process import InterProcessRequestor
-from frigate.config.semantic_search import SemanticSearchConfig
+from frigate.config import FrigateConfig
 from frigate.const import (
     CONFIG_DIR,
     FACE_DIR,
@@ -62,9 +62,7 @@ def get_metadata(event: Event) -> dict:
 class Embeddings:
     """SQLite-vec embeddings database."""
 
-    def __init__(
-        self, config: SemanticSearchConfig, db: SqliteVecQueueDatabase
-    ) -> None:
+    def __init__(self, config: FrigateConfig, db: SqliteVecQueueDatabase) -> None:
         self.config = config
         self.db = db
         self.requestor = InterProcessRequestor()
@@ -76,7 +74,7 @@ class Embeddings:
             "jinaai/jina-clip-v1-text_model_fp16.onnx",
             "jinaai/jina-clip-v1-tokenizer",
             "jinaai/jina-clip-v1-vision_model_fp16.onnx"
-            if config.model_size == "large"
+            if config.semantic_search.model_size == "large"
             else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
             "jinaai/jina-clip-v1-preprocessor_config.json",
         ]
@@ -97,7 +95,7 @@ class Embeddings:
             download_urls={
                 "text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
             },
-            model_size=config.model_size,
+            model_size=config.semantic_search.model_size,
             model_type=ModelTypeEnum.text,
             requestor=self.requestor,
             device="CPU",
@@ -105,7 +103,7 @@ class Embeddings:
 
         model_file = (
             "vision_model_fp16.onnx"
-            if self.config.model_size == "large"
+            if self.config.semantic_search.model_size == "large"
             else "vision_model_quantized.onnx"
         )
 
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index d0f351233..737dc3c22 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -32,6 +32,7 @@ from .embeddings import Embeddings
 
 logger = logging.getLogger(__name__)
 
+REQUIRED_FACES = 2
 MAX_THUMBNAILS = 10
 
 
@@ -46,7 +47,7 @@ class EmbeddingMaintainer(threading.Thread):
     ) -> None:
         super().__init__(name="embeddings_maintainer")
         self.config = config
-        self.embeddings = Embeddings(config.semantic_search, db)
+        self.embeddings = Embeddings(config, db)
 
         # Check if we need to re-index events
         if config.semantic_search.reindex:
@@ -61,10 +62,9 @@ class EmbeddingMaintainer(threading.Thread):
         self.frame_manager = SharedMemoryFrameManager()
 
         # set face recognition conditions
-        self.face_recognition_enabled = (
-            self.config.semantic_search.face_recognition.enabled
-        )
+        self.face_recognition_enabled = self.config.face_recognition.enabled
         self.requires_face_detection = "face" not in self.config.model.all_attributes
+        self.detected_faces: dict[str, float] = {}
 
         # create communication for updating event descriptions
         self.requestor = InterProcessRequestor()
@@ -183,6 +183,9 @@ class EmbeddingMaintainer(threading.Thread):
             event_id, camera, updated_db = ended
             camera_config = self.config.cameras[camera]
 
+            if event_id in self.detected_faces:
+                self.detected_faces.pop(event_id)
+
             if updated_db:
                 try:
                     event: Event = Event.get(Event.id == event_id)
@@ -276,25 +279,28 @@ class EmbeddingMaintainer(threading.Thread):
 
     def _search_face(self, query_embedding: bytes) -> list:
         """Search for the face most closely matching the embedding."""
-        sql_query = """
+        sql_query = f"""
             SELECT
                 id,
                 distance
             FROM vec_faces
             WHERE face_embedding MATCH ?
-                AND k = 10 ORDER BY distance
+                AND k = {REQUIRED_FACES} ORDER BY distance
         """
         return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()
 
     def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
         """Look for faces in image."""
+        id = obj_data["id"]
+
         # don't run for non person objects
         if obj_data.get("label") != "person":
             logger.debug("Not a processing face for non person object.")
             return
 
-        # don't overwrite sub label for objects that have one
-        if obj_data.get("sub_label"):
+        # don't overwrite sub label for objects that have a sub label
+        # that is not a face
+        if obj_data.get("sub_label") and id not in self.detected_faces:
             logger.debug(
                 f"Not processing face due to existing sub label: {obj_data.get('sub_label')}."
             )
@@ -348,18 +354,35 @@ class EmbeddingMaintainer(threading.Thread):
         best_faces = self._search_face(query_embedding)
         logger.debug(f"Detected best faces for person as: {best_faces}")
 
-        if not best_faces:
+        if not best_faces or len(best_faces) < REQUIRED_FACES:
             return
 
         sub_label = str(best_faces[0][0]).split("-")[0]
-        score = 1.0 - best_faces[0][1]
+        avg_score = 0
 
-        if score < self.config.semantic_search.face_recognition.threshold:
+        for face in best_faces:
+            score = 1.0 - face[1]
+
+            if face[0] != sub_label:
+                logger.debug("Detected multiple faces, result is not valid.")
+                return None
+
+            avg_score += score
+
+        avg_score = avg_score / REQUIRED_FACES
+
+        if avg_score < self.config.semantic_search.face_recognition.threshold or (
+            id in self.detected_faces and avg_score <= self.detected_faces[id]
+        ):
+            logger.debug(
+                "Detected face does not score higher than threshold / previous face."
+            )
             return None
 
+        self.detected_faces[id] = avg_score
         requests.post(
-            f"{FRIGATE_LOCALHOST}/api/events/{obj_data['id']}/sub_label",
-            json={"subLabel": sub_label, "subLabelScore": score},
+            f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label",
+            json={"subLabel": sub_label, "subLabelScore": avg_score},
         )
 
     def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:

From 616ce871131d178c3671f635c18a7803e9161e02 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Wed, 23 Oct 2024 09:26:03 -0600
Subject: [PATCH 7/8] Fix access (#14540)

---
 frigate/embeddings/embeddings.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index 3fb6d5d26..4bb1afcd6 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -116,10 +116,10 @@ class Embeddings:
             model_name="jinaai/jina-clip-v1",
             model_file=model_file,
             download_urls=download_urls,
-            model_size=config.model_size,
+            model_size=config.semantic_search.model_size,
             model_type=ModelTypeEnum.vision,
             requestor=self.requestor,
-            device="GPU" if config.model_size == "large" else "CPU",
+            device="GPU" if config.semantic_search.model_size == "large" else "CPU",
         )
 
         self.face_embedding = None

From 4fa5625898000adf3fccee2918be34aa28f65888 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Wed, 23 Oct 2024 13:50:58 -0600
Subject: [PATCH 8/8] Face detection (#14544)

* Add support for face detection

* Add support for detecting faces during registration

* Set body size to be larger

* Undo
---
 .../rootfs/usr/local/nginx/conf/nginx.conf    |   2 +
 frigate/api/defs/events_body.py               |   3 +
 frigate/api/event.py                          |  55 +++++--
 frigate/embeddings/embeddings.py              |   3 +-
 frigate/embeddings/maintainer.py              | 148 ++++++++++++++----
 5 files changed, 159 insertions(+), 52 deletions(-)

diff --git a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf
index 75527bf53..fa487a083 100644
--- a/docker/main/rootfs/usr/local/nginx/conf/nginx.conf
+++ b/docker/main/rootfs/usr/local/nginx/conf/nginx.conf
@@ -246,6 +246,8 @@ http {
             proxy_no_cache $should_not_cache;
             add_header X-Cache-Status $upstream_cache_status;
 
+            client_max_body_size 10M;
+
             location /api/vod/ {
                 include auth_request.conf;
                 proxy_pass http://frigate_api/vod/;
diff --git a/frigate/api/defs/events_body.py b/frigate/api/defs/events_body.py
index ca1256598..cb15c18ce 100644
--- a/frigate/api/defs/events_body.py
+++ b/frigate/api/defs/events_body.py
@@ -8,6 +8,9 @@ class EventsSubLabelBody(BaseModel):
     subLabelScore: Optional[float] = Field(
         title="Score for sub label", default=None, gt=0.0, le=1.0
     )
+    camera: Optional[str] = Field(
+        title="Camera this object is detected on.", default=None
+    )
 
 
 class EventsDescriptionBody(BaseModel):
diff --git a/frigate/api/event.py b/frigate/api/event.py
index 7f4f14610..a27373844 100644
--- a/frigate/api/event.py
+++ b/frigate/api/event.py
@@ -890,38 +890,59 @@ def set_sub_label(
     try:
         event: Event = Event.get(Event.id == event_id)
     except DoesNotExist:
+        if not body.camera:
+            return JSONResponse(
+                content=(
+                    {
+                        "success": False,
+                        "message": "Event "
+                        + event_id
+                        + " not found and camera is not provided.",
+                    }
+                ),
+                status_code=404,
+            )
+
+        event = None
+
+    if request.app.detected_frames_processor:
+        tracked_obj: TrackedObject = (
+            request.app.detected_frames_processor.camera_states[
+                event.camera if event else body.camera
+            ].tracked_objects.get(event_id)
+        )
+    else:
+        tracked_obj = None
+
+    if not event and not tracked_obj:
         return JSONResponse(
-            content=({"success": False, "message": "Event " + event_id + " not found"}),
+            content=(
+                {"success": False, "message": "Event " + event_id + " not found."}
+            ),
             status_code=404,
         )
 
     new_sub_label = body.subLabel
     new_score = body.subLabelScore
 
-    if not event.end_time:
-        # update tracked object
-        tracked_obj: TrackedObject = (
-            request.app.detected_frames_processor.camera_states[
-                event.camera
-            ].tracked_objects.get(event.id)
-        )
-
-        if tracked_obj:
-            tracked_obj.obj_data["sub_label"] = (new_sub_label, new_score)
+    if tracked_obj:
+        tracked_obj.obj_data["sub_label"] = (new_sub_label, new_score)
 
         # update timeline items
         Timeline.update(
             data=Timeline.data.update({"sub_label": (new_sub_label, new_score)})
         ).where(Timeline.source_id == event_id).execute()
 
-    event.sub_label = new_sub_label
+    if event:
+        event.sub_label = new_sub_label
 
-    if new_score:
-        data = event.data
-        data["sub_label_score"] = new_score
-        event.data = data
+        if new_score:
+            data = event.data
+            data["sub_label_score"] = new_score
+            event.data = data
+
+        event.save()
 
-    event.save()
     return JSONResponse(
         content=(
             {
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index 4bb1afcd6..cc54ba548 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -129,7 +129,8 @@ class Embeddings:
                 model_name="facenet",
                 model_file="facenet.onnx",
                 download_urls={
-                    "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx"
+                    "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx",
+                    "facedet.onnx": "https://github.com/opencv/opencv_zoo/raw/refs/heads/main/models/face_detection_yunet/face_detection_yunet_2023mar_int8.onnx",
                 },
                 model_size="large",
                 model_type=ModelTypeEnum.face,
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index 737dc3c22..ca7d09238 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -72,6 +72,19 @@ class EmbeddingMaintainer(threading.Thread):
         self.tracked_events: dict[str, list[any]] = {}
         self.genai_client = get_genai_client(config.genai)
 
+    @property
+    def face_detector(self) -> cv2.FaceDetectorYN:
+        # Lazily create the classifier.
+        if "face_detector" not in self.__dict__:
+            self.__dict__["face_detector"] = cv2.FaceDetectorYN.create(
+                "/config/model_cache/facenet/facedet.onnx",
+                config="",
+                input_size=(320, 320),
+                score_threshold=0.8,
+                nms_threshold=0.3,
+            )
+        return self.__dict__["face_detector"]
+
     def run(self) -> None:
         """Maintain a SQLite-vec database for semantic search."""
         while not self.stop_event.is_set():
@@ -90,7 +103,7 @@ class EmbeddingMaintainer(threading.Thread):
     def _process_requests(self) -> None:
         """Process embeddings requests"""
 
-        def _handle_request(topic: str, data: str) -> str:
+        def _handle_request(topic: str, data: dict[str, any]) -> str:
             try:
                 if topic == EmbeddingsRequestEnum.embed_description.value:
                     return serialize(
@@ -110,12 +123,34 @@ class EmbeddingMaintainer(threading.Thread):
                         self.embeddings.text_embedding([data])[0], pack=False
                     )
                 elif topic == EmbeddingsRequestEnum.register_face.value:
-                    self.embeddings.embed_face(
-                        data["face_name"],
-                        base64.b64decode(data["image"]),
-                        upsert=True,
-                    )
-                    return None
+                    if data.get("cropped"):
+                        self.embeddings.embed_face(
+                            data["face_name"],
+                            base64.b64decode(data["image"]),
+                            upsert=True,
+                        )
+                        return True
+                    else:
+                        img = cv2.imdecode(
+                            np.frombuffer(
+                                base64.b64decode(data["image"]), dtype=np.uint8
+                            ),
+                            cv2.IMREAD_COLOR,
+                        )
+                        face_box = self._detect_face(img)
+
+                        if not face_box:
+                            return False
+
+                        face = img[face_box[1] : face_box[3], face_box[0] : face_box[2]]
+                        ret, webp = cv2.imencode(
+                            ".webp", face, [int(cv2.IMWRITE_WEBP_QUALITY), 100]
+                        )
+                        self.embeddings.embed_face(
+                            data["face_name"], webp.tobytes(), upsert=True
+                        )
+
+                    return False
             except Exception as e:
                 logger.error(f"Unable to handle embeddings request {e}")
 
@@ -277,7 +312,7 @@ class EmbeddingMaintainer(threading.Thread):
         if event_id:
             self.handle_regenerate_description(event_id, source)
 
-    def _search_face(self, query_embedding: bytes) -> list:
+    def _search_face(self, query_embedding: bytes) -> list[tuple[str, float]]:
         """Search for the face most closely matching the embedding."""
         sql_query = f"""
             SELECT
@@ -289,6 +324,29 @@ class EmbeddingMaintainer(threading.Thread):
         """
         return self.embeddings.db.execute_sql(sql_query, [query_embedding]).fetchall()
 
+    def _detect_face(self, input: np.ndarray) -> tuple[int, int, int, int]:
+        """Detect faces in input image."""
+        self.face_detector.setInputSize((input.shape[1], input.shape[0]))
+        faces = self.face_detector.detect(input)
+
+        if faces[1] is None:
+            return None
+
+        face = None
+
+        for _, potential_face in enumerate(faces[1]):
+            raw_bbox = potential_face[0:4].astype(np.uint16)
+            x: int = max(raw_bbox[0], 0)
+            y: int = max(raw_bbox[1], 0)
+            w: int = raw_bbox[2]
+            h: int = raw_bbox[3]
+            bbox = (x, y, x + w, y + h)
+
+            if face is None or area(bbox) > area(face):
+                face = bbox
+
+        return face
+
     def _process_face(self, obj_data: dict[str, any], frame: np.ndarray) -> None:
         """Look for faces in image."""
         id = obj_data["id"]
@@ -309,8 +367,23 @@ class EmbeddingMaintainer(threading.Thread):
         face: Optional[dict[str, any]] = None
 
         if self.requires_face_detection:
-            # TODO run cv2 face detection
-            pass
+            logger.debug("Running manual face detection.")
+            person_box = obj_data.get("box")
+
+            if not person_box:
+                return None
+
+            rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420)
+            left, top, right, bottom = person_box
+            person = rgb[top:bottom, left:right]
+            face = self._detect_face(person)
+
+            if not face:
+                logger.debug("Detected no faces for person object.")
+                return
+
+            face_frame = person[face[1] : face[3], face[0] : face[2]]
+            face_frame = cv2.cvtColor(face_frame, cv2.COLOR_RGB2BGR)
         else:
             # don't run for object without attributes
             if not obj_data.get("current_attributes"):
@@ -325,23 +398,23 @@ class EmbeddingMaintainer(threading.Thread):
                 if face is None or attr.get("score", 0.0) > face.get("score", 0.0):
                     face = attr
 
-        # no faces detected in this frame
-        if not face:
-            return
+            # no faces detected in this frame
+            if not face:
+                return
 
-        face_box = face.get("box")
+            face_box = face.get("box")
 
-        # check that face is valid
-        if (
-            not face_box
-            or area(face_box) < self.config.semantic_search.face_recognition.min_area
-        ):
-            logger.debug(f"Invalid face box {face}")
-            return
+            # check that face is valid
+            if not face_box or area(face_box) < self.config.face_recognition.min_area:
+                logger.debug(f"Invalid face box {face}")
+                return
 
-        face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
-        face_frame = face_frame[face_box[1] : face_box[3], face_box[0] : face_box[2]]
-        ret, jpg = cv2.imencode(
+            face_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
+            face_frame = face_frame[
+                face_box[1] : face_box[3], face_box[0] : face_box[2]
+            ]
+
+        ret, webp = cv2.imencode(
             ".webp", face_frame, [int(cv2.IMWRITE_WEBP_QUALITY), 100]
         )
 
@@ -349,12 +422,13 @@ class EmbeddingMaintainer(threading.Thread):
             logger.debug("Not processing face due to error creating cropped image.")
             return
 
-        embedding = self.embeddings.embed_face("unknown", jpg.tobytes(), upsert=False)
+        embedding = self.embeddings.embed_face("unknown", webp.tobytes(), upsert=False)
         query_embedding = serialize(embedding)
         best_faces = self._search_face(query_embedding)
         logger.debug(f"Detected best faces for person as: {best_faces}")
 
         if not best_faces or len(best_faces) < REQUIRED_FACES:
+            logger.debug(f"{len(best_faces)} < {REQUIRED_FACES} min required faces.")
             return
 
         sub_label = str(best_faces[0][0]).split("-")[0]
@@ -363,28 +437,34 @@ class EmbeddingMaintainer(threading.Thread):
         for face in best_faces:
             score = 1.0 - face[1]
 
-            if face[0] != sub_label:
+            if face[0].split("-")[0] != sub_label:
                 logger.debug("Detected multiple faces, result is not valid.")
-                return None
+                return
 
             avg_score += score
 
-        avg_score = avg_score / REQUIRED_FACES
+        avg_score = round(avg_score / REQUIRED_FACES, 2)
 
-        if avg_score < self.config.semantic_search.face_recognition.threshold or (
+        if avg_score < self.config.face_recognition.threshold or (
             id in self.detected_faces and avg_score <= self.detected_faces[id]
         ):
             logger.debug(
-                "Detected face does not score higher than threshold / previous face."
+                f"Recognized face score {avg_score} is less than threshold ({self.config.face_recognition.threshold}) / previous face score ({self.detected_faces.get(id)})."
             )
-            return None
+            return
 
-        self.detected_faces[id] = avg_score
-        requests.post(
+        resp = requests.post(
             f"{FRIGATE_LOCALHOST}/api/events/{id}/sub_label",
-            json={"subLabel": sub_label, "subLabelScore": avg_score},
+            json={
+                "camera": obj_data.get("camera"),
+                "subLabel": sub_label,
+                "subLabelScore": avg_score,
+            },
         )
 
+        if resp.status_code == 200:
+            self.detected_faces[id] = avg_score
+
     def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]:
         """Return jpg thumbnail of a region of the frame."""
         frame = cv2.cvtColor(yuv_frame, cv2.COLOR_YUV2BGR_I420)