From 62e3315086268a26a6a2fd4611a2dd00ad125865 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sat, 26 Oct 2024 06:06:12 -0500 Subject: [PATCH] model fixes and small tweaks --- frigate/embeddings/alpr/alpr.py | 79 ++++++++++------------------ frigate/embeddings/embeddings.py | 45 ++++++++++++++++ frigate/embeddings/functions/onnx.py | 6 +-- frigate/embeddings/maintainer.py | 17 +++--- 4 files changed, 87 insertions(+), 60 deletions(-) diff --git a/frigate/embeddings/alpr/alpr.py b/frigate/embeddings/alpr/alpr.py index d96711017..b91a50e3a 100644 --- a/frigate/embeddings/alpr/alpr.py +++ b/frigate/embeddings/alpr/alpr.py @@ -9,20 +9,24 @@ from shapely.geometry import Polygon from frigate.comms.inter_process import InterProcessRequestor from frigate.config.semantic_search import LicensePlateRecognitionConfig -from frigate.embeddings.functions.onnx import GenericONNXEmbedding, ModelTypeEnum +from frigate.embeddings.embeddings import Embeddings logger = logging.getLogger(__name__) class LicensePlateRecognition: def __init__( - self, config: LicensePlateRecognitionConfig, requestor: InterProcessRequestor + self, + config: LicensePlateRecognitionConfig, + requestor: InterProcessRequestor, + embeddings: Embeddings, ): self.lpr_config = config self.requestor = requestor - self.detection_model = self._create_detection_model() - self.classification_model = self._create_classification_model() - self.recognition_model = self._create_recognition_model() + self.embeddings = embeddings + self.detection_model = self.embeddings.lpr_detection_model + self.classification_model = self.embeddings.lpr_classification_model + self.recognition_model = self.embeddings.lpr_recognition_model self.ctc_decoder = CTCDecoder() self.batch_size = 6 @@ -32,49 +36,12 @@ class LicensePlateRecognition: self.max_size = 960 self.box_thresh = 0.8 self.mask_thresh = 0.8 - self.mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64") - self.std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype( - "float64" - ) - def _create_detection_model(self) -> GenericONNXEmbedding: - return GenericONNXEmbedding( - model_name="paddleocr-onnx", - model_file="detection.onnx", - download_urls={ - "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx" - }, - model_size="large", - model_type=ModelTypeEnum.alpr_detect, - requestor=self.requestor, - device="CPU", - ) - - def _create_classification_model(self) -> GenericONNXEmbedding: - return GenericONNXEmbedding( - model_name="paddleocr-onnx", - model_file="classification.onnx", - download_urls={ - "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx" - }, - model_size="large", - model_type=ModelTypeEnum.alpr_classify, - requestor=self.requestor, - device="CPU", - ) - - def _create_recognition_model(self) -> GenericONNXEmbedding: - return GenericONNXEmbedding( - model_name="paddleocr-onnx", - model_file="recognition.onnx", - download_urls={ - "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" - }, - model_size="large", - model_type=ModelTypeEnum.alpr_recognize, - requestor=self.requestor, - device="CPU", - ) + if self.lpr_config.enabled: + # all models need to be loaded to run LPR + self.detection_model._load_model_and_utils() + self.classification_model._load_model_and_utils() + self.recognition_model._load_model_and_utils() def detect(self, image: np.ndarray) -> List[np.ndarray]: """ @@ -179,6 +146,15 @@ class LicensePlateRecognition: Returns: Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates. """ + if ( + self.detection_model.runner is None + or self.classification_model.runner is None + or self.recognition_model.runner is None + ): + # we might still be downloading the models + logger.debug("Model runners not loaded") + return [], [], [] + plate_points = self.detect(image) if len(plate_points) == 0: return [], [], [] @@ -209,7 +185,7 @@ class LicensePlateRecognition: average_confidence = conf - # TODO: remove + # set to True to write each cropped image for debugging if False: save_image = cv2.cvtColor( rotated_images[original_idx], cv2.COLOR_RGB2BGR @@ -251,9 +227,12 @@ class LicensePlateRecognition: Returns: np.ndarray: The normalized image, transposed to match the model's expected input format. """ + mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64") + std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype("float64") + image = image.astype("float32") - cv2.subtract(image, self.mean, image) - cv2.multiply(image, self.std, image) + cv2.subtract(image, mean, image) + cv2.multiply(image, std, image) return image.transpose((2, 0, 1))[np.newaxis, ...] def boxes_from_bitmap( diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index cc54ba548..a2de88394 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -77,6 +77,10 @@ class Embeddings: if config.semantic_search.model_size == "large" else "jinaai/jina-clip-v1-vision_model_quantized.onnx", "jinaai/jina-clip-v1-preprocessor_config.json", + "facenet-facenet.onnx", + "paddleocr-onnx-detection.onnx", + "paddleocr-onnx-classification.onnx", + "paddleocr-onnx-recognition.onnx", ] for model in models: @@ -138,6 +142,47 @@ class Embeddings: device="GPU", ) + self.lpr_detection_model = None + self.lpr_classification_model = None + self.lpr_recognition_model = None + + if self.config.lpr.enabled: + self.lpr_detection_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="detection.onnx", + download_urls={ + "detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_detect, + requestor=self.requestor, + device="CPU", + ) + + self.lpr_classification_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="classification.onnx", + download_urls={ + "classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_classify, + requestor=self.requestor, + device="CPU", + ) + + self.lpr_recognition_model = GenericONNXEmbedding( + model_name="paddleocr-onnx", + model_file="recognition.onnx", + download_urls={ + "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" + }, + model_size="large", + model_type=ModelTypeEnum.alpr_recognize, + requestor=self.requestor, + device="CPU", + ) + def embed_thumbnail( self, event_id: str, thumbnail: bytes, upsert: bool = True ) -> ndarray: diff --git a/frigate/embeddings/functions/onnx.py b/frigate/embeddings/functions/onnx.py index 1507ba21d..200f728d3 100644 --- a/frigate/embeddings/functions/onnx.py +++ b/frigate/embeddings/functions/onnx.py @@ -92,7 +92,7 @@ class GenericONNXEmbedding: files_names, ModelStatusTypesEnum.downloaded, ) - self._load_model_and_tokenizer() + self._load_model_and_utils() logger.debug(f"models are already downloaded for {self.model_name}") def _download_model(self, path: str): @@ -132,7 +132,7 @@ class GenericONNXEmbedding: }, ) - def _load_model_and_tokenizer(self): + def _load_model_and_utils(self): if self.runner is None: if self.downloader: self.downloader.wait_for_download() @@ -254,7 +254,7 @@ class GenericONNXEmbedding: def __call__( self, inputs: Union[List[str], List[Image.Image], List[str]] ) -> List[np.ndarray]: - self._load_model_and_tokenizer() + self._load_model_and_utils() if self.runner is None or ( self.tokenizer is None and self.feature_extractor is None ): diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index f8736128e..19337f41f 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -75,13 +75,12 @@ class EmbeddingMaintainer(threading.Thread): # set license plate recognition conditions self.lpr_config = self.config.lpr - self.requires_license_plate_detection = ( - "license_plate" not in self.config.model.all_attributes - ) self.detected_license_plates: dict[str, dict[str, any]] = {} - self.license_plate_recognition = LicensePlateRecognition( - self.lpr_config, self.requestor - ) + + if self.lpr_config.enabled: + self.license_plate_recognition = LicensePlateRecognition( + self.lpr_config, self.requestor, self.embeddings + ) @property def face_detector(self) -> cv2.FaceDetectorYN: @@ -555,8 +554,12 @@ class EmbeddingMaintainer(threading.Thread): if license_plates: for plate, confidence, text_area in zip(license_plates, confidences, areas): + avg_confidence = ( + (sum(confidence) / len(confidence)) if confidence else 0 + ) + logger.debug( - f"Detected text: {plate} (average confidence: {(sum(confidence) / len(confidence)):.2f}, area: {text_area} pixels)" + f"Detected text: {plate} (average confidence: {avg_confidence:.2f}, area: {text_area} pixels)" ) else: # no plates found