From 62e3315086268a26a6a2fd4611a2dd00ad125865 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Sat, 26 Oct 2024 06:06:12 -0500
Subject: [PATCH] model fixes and small tweaks

---
 frigate/embeddings/alpr/alpr.py      | 79 ++++++++++------------------
 frigate/embeddings/embeddings.py     | 45 ++++++++++++++++
 frigate/embeddings/functions/onnx.py |  6 +--
 frigate/embeddings/maintainer.py     | 17 +++---
 4 files changed, 87 insertions(+), 60 deletions(-)

diff --git a/frigate/embeddings/alpr/alpr.py b/frigate/embeddings/alpr/alpr.py
index d96711017..b91a50e3a 100644
--- a/frigate/embeddings/alpr/alpr.py
+++ b/frigate/embeddings/alpr/alpr.py
@@ -9,20 +9,24 @@ from shapely.geometry import Polygon
 
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config.semantic_search import LicensePlateRecognitionConfig
-from frigate.embeddings.functions.onnx import GenericONNXEmbedding, ModelTypeEnum
+from frigate.embeddings.embeddings import Embeddings
 
 logger = logging.getLogger(__name__)
 
 
 class LicensePlateRecognition:
     def __init__(
-        self, config: LicensePlateRecognitionConfig, requestor: InterProcessRequestor
+        self,
+        config: LicensePlateRecognitionConfig,
+        requestor: InterProcessRequestor,
+        embeddings: Embeddings,
     ):
         self.lpr_config = config
         self.requestor = requestor
-        self.detection_model = self._create_detection_model()
-        self.classification_model = self._create_classification_model()
-        self.recognition_model = self._create_recognition_model()
+        self.embeddings = embeddings
+        self.detection_model = self.embeddings.lpr_detection_model
+        self.classification_model = self.embeddings.lpr_classification_model
+        self.recognition_model = self.embeddings.lpr_recognition_model
         self.ctc_decoder = CTCDecoder()
 
         self.batch_size = 6
@@ -32,49 +36,12 @@ class LicensePlateRecognition:
         self.max_size = 960
         self.box_thresh = 0.8
         self.mask_thresh = 0.8
-        self.mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
-        self.std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype(
-            "float64"
-        )
 
-    def _create_detection_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="detection.onnx",
-            download_urls={
-                "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_detect,
-            requestor=self.requestor,
-            device="CPU",
-        )
-
-    def _create_classification_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="classification.onnx",
-            download_urls={
-                "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_classify,
-            requestor=self.requestor,
-            device="CPU",
-        )
-
-    def _create_recognition_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="recognition.onnx",
-            download_urls={
-                "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_recognize,
-            requestor=self.requestor,
-            device="CPU",
-        )
+        if self.lpr_config.enabled:
+            # all models need to be loaded to run LPR
+            self.detection_model._load_model_and_utils()
+            self.classification_model._load_model_and_utils()
+            self.recognition_model._load_model_and_utils()
 
     def detect(self, image: np.ndarray) -> List[np.ndarray]:
         """
@@ -179,6 +146,15 @@ class LicensePlateRecognition:
         Returns:
             Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates.
         """
+        if (
+            self.detection_model.runner is None
+            or self.classification_model.runner is None
+            or self.recognition_model.runner is None
+        ):
+            # we might still be downloading the models
+            logger.debug("Model runners not loaded")
+            return [], [], []
+
         plate_points = self.detect(image)
         if len(plate_points) == 0:
             return [], [], []
@@ -209,7 +185,7 @@ class LicensePlateRecognition:
 
                 average_confidence = conf
 
-                # TODO: remove
+                # set to True to write each cropped image for debugging
                 if False:
                     save_image = cv2.cvtColor(
                         rotated_images[original_idx], cv2.COLOR_RGB2BGR
@@ -251,9 +227,12 @@ class LicensePlateRecognition:
         Returns:
             np.ndarray: The normalized image, transposed to match the model's expected input format.
         """
+        mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
+        std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype("float64")
+
         image = image.astype("float32")
-        cv2.subtract(image, self.mean, image)
-        cv2.multiply(image, self.std, image)
+        cv2.subtract(image, mean, image)
+        cv2.multiply(image, std, image)
         return image.transpose((2, 0, 1))[np.newaxis, ...]
 
     def boxes_from_bitmap(
diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
index cc54ba548..a2de88394 100644
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@@ -77,6 +77,10 @@ class Embeddings:
             if config.semantic_search.model_size == "large"
             else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
             "jinaai/jina-clip-v1-preprocessor_config.json",
+            "facenet-facenet.onnx",
+            "paddleocr-onnx-detection.onnx",
+            "paddleocr-onnx-classification.onnx",
+            "paddleocr-onnx-recognition.onnx",
         ]
 
         for model in models:
@@ -138,6 +142,47 @@ class Embeddings:
                 device="GPU",
             )
 
+        self.lpr_detection_model = None
+        self.lpr_classification_model = None
+        self.lpr_recognition_model = None
+
+        if self.config.lpr.enabled:
+            self.lpr_detection_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="detection.onnx",
+                download_urls={
+                    "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_detect,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
+            self.lpr_classification_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="classification.onnx",
+                download_urls={
+                    "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_classify,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
+            self.lpr_recognition_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="recognition.onnx",
+                download_urls={
+                    "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_recognize,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
     def embed_thumbnail(
         self, event_id: str, thumbnail: bytes, upsert: bool = True
     ) -> ndarray:
diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py
index 1507ba21d..200f728d3 100644
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@@ -92,7 +92,7 @@ class GenericONNXEmbedding:
                 files_names,
                 ModelStatusTypesEnum.downloaded,
             )
-            self._load_model_and_tokenizer()
+            self._load_model_and_utils()
             logger.debug(f"models are already downloaded for {self.model_name}")
 
     def _download_model(self, path: str):
@@ -132,7 +132,7 @@ class GenericONNXEmbedding:
                 },
             )
 
-    def _load_model_and_tokenizer(self):
+    def _load_model_and_utils(self):
         if self.runner is None:
             if self.downloader:
                 self.downloader.wait_for_download()
@@ -254,7 +254,7 @@ class GenericONNXEmbedding:
     def __call__(
         self, inputs: Union[List[str], List[Image.Image], List[str]]
     ) -> List[np.ndarray]:
-        self._load_model_and_tokenizer()
+        self._load_model_and_utils()
         if self.runner is None or (
             self.tokenizer is None and self.feature_extractor is None
         ):
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index f8736128e..19337f41f 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -75,13 +75,12 @@ class EmbeddingMaintainer(threading.Thread):
 
         # set license plate recognition conditions
         self.lpr_config = self.config.lpr
-        self.requires_license_plate_detection = (
-            "license_plate" not in self.config.model.all_attributes
-        )
         self.detected_license_plates: dict[str, dict[str, any]] = {}
-        self.license_plate_recognition = LicensePlateRecognition(
-            self.lpr_config, self.requestor
-        )
+
+        if self.lpr_config.enabled:
+            self.license_plate_recognition = LicensePlateRecognition(
+                self.lpr_config, self.requestor, self.embeddings
+            )
 
     @property
     def face_detector(self) -> cv2.FaceDetectorYN:
@@ -555,8 +554,12 @@ class EmbeddingMaintainer(threading.Thread):
 
         if license_plates:
             for plate, confidence, text_area in zip(license_plates, confidences, areas):
+                avg_confidence = (
+                    (sum(confidence) / len(confidence)) if confidence else 0
+                )
+
                 logger.debug(
-                    f"Detected text: {plate} (average confidence: {(sum(confidence) / len(confidence)):.2f}, area: {text_area} pixels)"
+                    f"Detected text: {plate} (average confidence: {avg_confidence:.2f}, area: {text_area} pixels)"
                 )
         else:
             # no plates found