From 3a5e100fabc626d83c64143f3098c52388496ddb Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Tue, 22 Oct 2024 15:05:06 -0600
Subject: [PATCH] Use facenet model

---
 docker/main/requirements-wheels.txt  |  1 -
 frigate/config/semantic_search.py    |  2 +-
 frigate/db/sqlitevecq.py             |  2 +-
 frigate/embeddings/embeddings.py     |  6 ++--
 frigate/embeddings/functions/onnx.py | 42 ++++++++++++----------------
 frigate/util/model.py                | 19 -------------
 6 files changed, 23 insertions(+), 49 deletions(-)

diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
index d0d53608a..c48b99c75 100644
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@@ -34,7 +34,6 @@ ws4py == 0.5.*
 unidecode == 1.3.*
 # OpenVino & ONNX
 openvino == 2024.3.*
-onnx == 1.17.*
 onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
 onnxruntime == 1.19.* ; platform_machine == 'aarch64'
 # Embeddings
diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py
index 8ed291b1e..32ff8cf3c 100644
--- a/frigate/config/semantic_search.py
+++ b/frigate/config/semantic_search.py
@@ -10,7 +10,7 @@ __all__ = ["FaceRecognitionConfig", "SemanticSearchConfig"]
 class FaceRecognitionConfig(FrigateBaseModel):
     enabled: bool = Field(default=False, title="Enable face recognition.")
     threshold: float = Field(
-        default=0.8, title="Face similarity score required to be considered a match."
+        default=0.9, title="Face similarity score required to be considered a match."
     )
     min_area: int = Field(
         default=500, title="Min area of face box to consider running face recognition."
diff --git a/frigate/db/sqlitevecq.py b/frigate/db/sqlitevecq.py
index d630e1ddf..1447fd48f 100644
--- a/frigate/db/sqlitevecq.py
+++ b/frigate/db/sqlitevecq.py
@@ -63,6 +63,6 @@ class SqliteVecQueueDatabase(SqliteQueueDatabase):
             self.execute_sql("""
                 CREATE VIRTUAL TABLE IF NOT EXISTS vec_faces USING vec0(
                     id TEXT PRIMARY KEY,
-                    face_embedding FLOAT[512] distance_metric=cosine
+                    face_embedding FLOAT[128] distance_metric=cosine
                 );
             """)
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index f72ca6230..6b0f94ca9 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -128,10 +128,10 @@ class Embeddings:
 
         if self.config.face_recognition.enabled:
             self.face_embedding = GenericONNXEmbedding(
-                model_name="resnet100/arcface",
-                model_file="arcfaceresnet100-8.onnx",
+                model_name="facenet",
+                model_file="facenet.onnx",
                 download_urls={
-                    "arcfaceresnet100-8.onnx": "https://media.githubusercontent.com/media/onnx/models/bb0d4cf3d4e2a5f7376c13a08d337e86296edbe8/vision/body_analysis/arcface/model/arcfaceresnet100-8.onnx"
+                    "facenet.onnx": "https://github.com/NicolasSM-001/faceNet.onnx-/raw/refs/heads/main/faceNet.onnx"
                 },
                 model_size="large",
                 model_type=ModelTypeEnum.face,
diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
index 274614a1b..9fc71d502 100644
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@@ -19,7 +19,7 @@ from frigate.comms.inter_process import InterProcessRequestor
 from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
 from frigate.types import ModelStatusTypesEnum
 from frigate.util.downloader import ModelDownloader
-from frigate.util.model import ONNXModelRunner, fix_spatial_mode
+from frigate.util.model import ONNXModelRunner
 
 warnings.filterwarnings(
     "ignore",
@@ -31,6 +31,8 @@ warnings.filterwarnings(
 disable_progress_bar()
 logger = logging.getLogger(__name__)
 
+FACE_EMBEDDING_SIZE = 160
+
 
 class ModelTypeEnum(str, Enum):
     face = "face"
@@ -93,12 +95,9 @@ class GenericONNXEmbedding:
     def _download_model(self, path: str):
         try:
             file_name = os.path.basename(path)
-            download_path = None
 
             if file_name in self.download_urls:
-                download_path = ModelDownloader.download_from_url(
-                    self.download_urls[file_name], path
-                )
+                ModelDownloader.download_from_url(self.download_urls[file_name], path)
             elif (
                 file_name == self.tokenizer_file
                 and self.model_type == ModelTypeEnum.text
@@ -114,14 +113,6 @@ class GenericONNXEmbedding:
                 )
                 tokenizer.save_pretrained(path)
 
-            # the onnx model has incorrect spatial mode
-            # set by default, update then save model.
-            print(
-                f"download path is {download_path} and model type is {self.model_type}"
-            )
-            if download_path is not None and self.model_type == ModelTypeEnum.face:
-                fix_spatial_mode(download_path)
-
             self.downloader.requestor.send_data(
                 UPDATE_MODEL_STATE,
                 {
@@ -196,30 +187,33 @@ class GenericONNXEmbedding:
 
             # handle images larger than input size
             width, height = pil.size
-            if width != 112 or height != 112:
+            if width != FACE_EMBEDDING_SIZE or height != FACE_EMBEDDING_SIZE:
                 if width > height:
-                    new_height = int(((height / width) * 112) // 4 * 4)
-                    pil = pil.resize((112, new_height))
+                    new_height = int(((height / width) * FACE_EMBEDDING_SIZE) // 4 * 4)
+                    pil = pil.resize((FACE_EMBEDDING_SIZE, new_height))
                 else:
-                    new_width = int(((width / height) * 112) // 4 * 4)
-                    pil = pil.resize((new_width, 112))
+                    new_width = int(((width / height) * FACE_EMBEDDING_SIZE) // 4 * 4)
+                    pil = pil.resize((new_width, FACE_EMBEDDING_SIZE))
 
             og = np.array(pil).astype(np.float32)
 
-            # Image must be 112x112
+            # Image must be FACE_EMBEDDING_SIZExFACE_EMBEDDING_SIZE
             og_h, og_w, channels = og.shape
-            frame = np.full((112, 112, channels), (0, 0, 0), dtype=np.float32)
+            frame = np.full(
+                (FACE_EMBEDDING_SIZE, FACE_EMBEDDING_SIZE, channels),
+                (0, 0, 0),
+                dtype=np.float32,
+            )
 
             # compute center offset
-            x_center = (112 - og_w) // 2
-            y_center = (112 - og_h) // 2
+            x_center = (FACE_EMBEDDING_SIZE - og_w) // 2
+            y_center = (FACE_EMBEDDING_SIZE - og_h) // 2
 
             # copy img image into center of result image
             frame[y_center : y_center + og_h, x_center : x_center + og_w] = og
 
             frame = np.expand_dims(frame, axis=0)
-            frame = np.transpose(frame, (0, 3, 1, 2))
-            return [{"data": frame}]
+            return [{"image_input": frame}]
         else:
             raise ValueError(f"Unable to preprocess inputs for {self.model_type}")
 
diff --git a/frigate/util/model.py b/frigate/util/model.py
index 22a3ff099..7aefe8b42 100644
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@@ -1,10 +1,8 @@
 """Model Utils"""
 
 import os
-from pathlib import Path
 from typing import Any
 
-import onnx
 import onnxruntime as ort
 
 try:
@@ -65,23 +63,6 @@ def get_ort_providers(
     return (providers, options)
 
 
-def fix_spatial_mode(path: Path) -> None:
-    save_path = str(path)
-    old_path = f"{save_path}.old"
-    path.rename(old_path)
-
-    model = onnx.load(old_path)
-
-    for node in model.graph.node:
-        if node.op_type == "BatchNormalization":
-            for attr in node.attribute:
-                if attr.name == "spatial":
-                    attr.i = 1
-
-    onnx.save(model, save_path)
-    Path(old_path).unlink()
-
-
 class ONNXModelRunner:
     """Run onnx models optimally based on available hardware."""