use a small yolov9 model for detection

2026-02-18 17:14:26 +03:00 · 2025-02-13 10:44:46 -06:00 · 2025-02-13 10:44:46 -06:00 · 0dcfcb2cf7
commit 0dcfcb2cf7
parent d6b5dc93cc
1 changed files with 44 additions and 0 deletions
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@ -5,6 +5,7 @@ from enum import Enum
 from io import BytesIO
 from typing import Dict, List, Optional, Union
 import cv2
 import numpy as np
 import requests
 from PIL import Image
@ -32,6 +33,7 @@ disable_progress_bar()
 logger = logging.getLogger(__name__)
 FACE_EMBEDDING_SIZE = 160
 LPR_EMBEDDING_SIZE = 256
 class ModelTypeEnum(str, Enum):
@ -41,6 +43,7 @@ class ModelTypeEnum(str, Enum):
    lpr_detect = "lpr_detect"
    lpr_classify = "lpr_classify"
    lpr_recognize = "lpr_recognize"
    yolov9_lpr_detect = "yolov9_lpr_detect"
 class GenericONNXEmbedding:
@ -148,6 +151,8 @@ class GenericONNXEmbedding:
                self.feature_extractor = []
            elif self.model_type == ModelTypeEnum.lpr_recognize:
                self.feature_extractor = []
            elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
                self.feature_extractor = []
            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
@ -237,6 +242,45 @@ class GenericONNXEmbedding:
            for img in raw_inputs:
                processed.append({"x": img})
            return processed
        elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
            if isinstance(raw_inputs, list):
                raise ValueError(
                    "License plate embedding does not support batch inputs."
                )
            # Get image as numpy array
            img = self._process_image(raw_inputs)
            height, width, channels = img.shape
            # Resize maintaining aspect ratio
            if width > height:
                new_height = int(((height / width) * LPR_EMBEDDING_SIZE) // 4 * 4)
                img = cv2.resize(img, (LPR_EMBEDDING_SIZE, new_height))
            else:
                new_width = int(((width / height) * LPR_EMBEDDING_SIZE) // 4 * 4)
                img = cv2.resize(img, (new_width, LPR_EMBEDDING_SIZE))
            # Get new dimensions after resize
            og_h, og_w, channels = img.shape
            # Create black square frame
            frame = np.full(
                (LPR_EMBEDDING_SIZE, LPR_EMBEDDING_SIZE, channels),
                (0, 0, 0),
                dtype=np.float32,
            )
            # Center the resized image in the square frame
            x_center = (LPR_EMBEDDING_SIZE - og_w) // 2
            y_center = (LPR_EMBEDDING_SIZE - og_h) // 2
            frame[y_center : y_center + og_h, x_center : x_center + og_w] = img
            # Normalize to 0-1
            frame = frame / 255.0
            # Convert from HWC to CHW format and add batch dimension
            frame = np.transpose(frame, (2, 0, 1))
            frame = np.expand_dims(frame, axis=0)
            return [{"images": frame}]
        else:
            raise ValueError(f"Unable to preprocess inputs for {self.model_type}")