model fixes and small tweaks

2026-02-15 15:45:27 +03:00 · 2024-10-26 06:06:12 -05:00 · 2024-10-26 06:06:12 -05:00 · 62e3315086
commit 62e3315086
parent 606eef1c58
4 changed files with 87 additions and 60 deletions
--- a/frigate/embeddings/alpr/alpr.py
+++ b/frigate/embeddings/alpr/alpr.py
@ -9,20 +9,24 @@ from shapely.geometry import Polygon

 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config.semantic_search import LicensePlateRecognitionConfig
-from frigate.embeddings.functions.onnx import GenericONNXEmbedding, ModelTypeEnum
+from frigate.embeddings.embeddings import Embeddings

 logger = logging.getLogger(__name__)


 class LicensePlateRecognition:
    def __init__(
-        self, config: LicensePlateRecognitionConfig, requestor: InterProcessRequestor
+        self,
+        config: LicensePlateRecognitionConfig,
+        requestor: InterProcessRequestor,
+        embeddings: Embeddings,
    ):
        self.lpr_config = config
        self.requestor = requestor
-        self.detection_model = self._create_detection_model()
-        self.classification_model = self._create_classification_model()
-        self.recognition_model = self._create_recognition_model()
+        self.embeddings = embeddings
+        self.detection_model = self.embeddings.lpr_detection_model
+        self.classification_model = self.embeddings.lpr_classification_model
+        self.recognition_model = self.embeddings.lpr_recognition_model
        self.ctc_decoder = CTCDecoder()

        self.batch_size = 6
@ -32,49 +36,12 @@ class LicensePlateRecognition:
        self.max_size = 960
        self.box_thresh = 0.8
        self.mask_thresh = 0.8
-        self.mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
-        self.std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype(
-            "float64"
-        )

-    def _create_detection_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="detection.onnx",
-            download_urls={
-                "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_detect,
-            requestor=self.requestor,
-            device="CPU",
-        )
-
-    def _create_classification_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="classification.onnx",
-            download_urls={
-                "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_classify,
-            requestor=self.requestor,
-            device="CPU",
-        )
-
-    def _create_recognition_model(self) -> GenericONNXEmbedding:
-        return GenericONNXEmbedding(
-            model_name="paddleocr-onnx",
-            model_file="recognition.onnx",
-            download_urls={
-                "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
-            },
-            model_size="large",
-            model_type=ModelTypeEnum.alpr_recognize,
-            requestor=self.requestor,
-            device="CPU",
-        )
+        if self.lpr_config.enabled:
+            # all models need to be loaded to run LPR
+            self.detection_model._load_model_and_utils()
+            self.classification_model._load_model_and_utils()
+            self.recognition_model._load_model_and_utils()

    def detect(self, image: np.ndarray) -> List[np.ndarray]:
        """
@ -179,6 +146,15 @@ class LicensePlateRecognition:
        Returns:
            Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates.
        """
+        if (
+            self.detection_model.runner is None
+            or self.classification_model.runner is None
+            or self.recognition_model.runner is None
+        ):
+            # we might still be downloading the models
+            logger.debug("Model runners not loaded")
+            return [], [], []
+
        plate_points = self.detect(image)
        if len(plate_points) == 0:
            return [], [], []
@ -209,7 +185,7 @@ class LicensePlateRecognition:

                average_confidence = conf

-                # TODO: remove
+                # set to True to write each cropped image for debugging
                if False:
                    save_image = cv2.cvtColor(
                        rotated_images[original_idx], cv2.COLOR_RGB2BGR
@ -251,9 +227,12 @@ class LicensePlateRecognition:
        Returns:
            np.ndarray: The normalized image, transposed to match the model's expected input format.
        """
+        mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
+        std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype("float64")
+
        image = image.astype("float32")
-        cv2.subtract(image, self.mean, image)
-        cv2.multiply(image, self.std, image)
+        cv2.subtract(image, mean, image)
+        cv2.multiply(image, std, image)
        return image.transpose((2, 0, 1))[np.newaxis, ...]

    def boxes_from_bitmap(
--- a/frigate/embeddings/embeddings.py
+++ b/frigate/embeddings/embeddings.py
@ -77,6 +77,10 @@ class Embeddings:
            if config.semantic_search.model_size == "large"
            else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
            "jinaai/jina-clip-v1-preprocessor_config.json",
+            "facenet-facenet.onnx",
+            "paddleocr-onnx-detection.onnx",
+            "paddleocr-onnx-classification.onnx",
+            "paddleocr-onnx-recognition.onnx",
        ]

        for model in models:
@ -138,6 +142,47 @@ class Embeddings:
                device="GPU",
            )

+        self.lpr_detection_model = None
+        self.lpr_classification_model = None
+        self.lpr_recognition_model = None
+
+        if self.config.lpr.enabled:
+            self.lpr_detection_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="detection.onnx",
+                download_urls={
+                    "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_detect,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
+            self.lpr_classification_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="classification.onnx",
+                download_urls={
+                    "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_classify,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
+            self.lpr_recognition_model = GenericONNXEmbedding(
+                model_name="paddleocr-onnx",
+                model_file="recognition.onnx",
+                download_urls={
+                    "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
+                },
+                model_size="large",
+                model_type=ModelTypeEnum.alpr_recognize,
+                requestor=self.requestor,
+                device="CPU",
+            )
+
    def embed_thumbnail(
        self, event_id: str, thumbnail: bytes, upsert: bool = True
    ) -> ndarray:
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@ -92,7 +92,7 @@ class GenericONNXEmbedding:
                files_names,
                ModelStatusTypesEnum.downloaded,
            )
-            self._load_model_and_tokenizer()
+            self._load_model_and_utils()
            logger.debug(f"models are already downloaded for {self.model_name}")

    def _download_model(self, path: str):
@ -132,7 +132,7 @@ class GenericONNXEmbedding:
                },
            )

-    def _load_model_and_tokenizer(self):
+    def _load_model_and_utils(self):
        if self.runner is None:
            if self.downloader:
                self.downloader.wait_for_download()
@ -254,7 +254,7 @@ class GenericONNXEmbedding:
    def __call__(
        self, inputs: Union[List[str], List[Image.Image], List[str]]
    ) -> List[np.ndarray]:
-        self._load_model_and_tokenizer()
+        self._load_model_and_utils()
        if self.runner is None or (
            self.tokenizer is None and self.feature_extractor is None
        ):
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -75,13 +75,12 @@ class EmbeddingMaintainer(threading.Thread):

        # set license plate recognition conditions
        self.lpr_config = self.config.lpr
-        self.requires_license_plate_detection = (
-            "license_plate" not in self.config.model.all_attributes
-        )
        self.detected_license_plates: dict[str, dict[str, any]] = {}
-        self.license_plate_recognition = LicensePlateRecognition(
-            self.lpr_config, self.requestor
-        )
+
+        if self.lpr_config.enabled:
+            self.license_plate_recognition = LicensePlateRecognition(
+                self.lpr_config, self.requestor, self.embeddings
+            )

    @property
    def face_detector(self) -> cv2.FaceDetectorYN:
@ -555,8 +554,12 @@ class EmbeddingMaintainer(threading.Thread):

        if license_plates:
            for plate, confidence, text_area in zip(license_plates, confidences, areas):
+                avg_confidence = (
+                    (sum(confidence) / len(confidence)) if confidence else 0
+                )
+
                logger.debug(
-                    f"Detected text: {plate} (average confidence: {(sum(confidence) / len(confidence)):.2f}, area: {text_area} pixels)"
+                    f"Detected text: {plate} (average confidence: {avg_confidence:.2f}, area: {text_area} pixels)"
                )
        else:
            # no plates found