use a small yolov9 model for detection

This commit is contained in:
Josh Hawkins 2025-02-13 10:44:46 -06:00
parent d6b5dc93cc
commit 0dcfcb2cf7

View File

@ -5,6 +5,7 @@ from enum import Enum
from io import BytesIO from io import BytesIO
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
import cv2
import numpy as np import numpy as np
import requests import requests
from PIL import Image from PIL import Image
@ -32,6 +33,7 @@ disable_progress_bar()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
FACE_EMBEDDING_SIZE = 160 FACE_EMBEDDING_SIZE = 160
LPR_EMBEDDING_SIZE = 256
class ModelTypeEnum(str, Enum): class ModelTypeEnum(str, Enum):
@ -41,6 +43,7 @@ class ModelTypeEnum(str, Enum):
lpr_detect = "lpr_detect" lpr_detect = "lpr_detect"
lpr_classify = "lpr_classify" lpr_classify = "lpr_classify"
lpr_recognize = "lpr_recognize" lpr_recognize = "lpr_recognize"
yolov9_lpr_detect = "yolov9_lpr_detect"
class GenericONNXEmbedding: class GenericONNXEmbedding:
@ -148,6 +151,8 @@ class GenericONNXEmbedding:
self.feature_extractor = [] self.feature_extractor = []
elif self.model_type == ModelTypeEnum.lpr_recognize: elif self.model_type == ModelTypeEnum.lpr_recognize:
self.feature_extractor = [] self.feature_extractor = []
elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
self.feature_extractor = []
self.runner = ONNXModelRunner( self.runner = ONNXModelRunner(
os.path.join(self.download_path, self.model_file), os.path.join(self.download_path, self.model_file),
@ -237,6 +242,45 @@ class GenericONNXEmbedding:
for img in raw_inputs: for img in raw_inputs:
processed.append({"x": img}) processed.append({"x": img})
return processed return processed
elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
if isinstance(raw_inputs, list):
raise ValueError(
"License plate embedding does not support batch inputs."
)
# Get image as numpy array
img = self._process_image(raw_inputs)
height, width, channels = img.shape
# Resize maintaining aspect ratio
if width > height:
new_height = int(((height / width) * LPR_EMBEDDING_SIZE) // 4 * 4)
img = cv2.resize(img, (LPR_EMBEDDING_SIZE, new_height))
else:
new_width = int(((width / height) * LPR_EMBEDDING_SIZE) // 4 * 4)
img = cv2.resize(img, (new_width, LPR_EMBEDDING_SIZE))
# Get new dimensions after resize
og_h, og_w, channels = img.shape
# Create black square frame
frame = np.full(
(LPR_EMBEDDING_SIZE, LPR_EMBEDDING_SIZE, channels),
(0, 0, 0),
dtype=np.float32,
)
# Center the resized image in the square frame
x_center = (LPR_EMBEDDING_SIZE - og_w) // 2
y_center = (LPR_EMBEDDING_SIZE - og_h) // 2
frame[y_center : y_center + og_h, x_center : x_center + og_w] = img
# Normalize to 0-1
frame = frame / 255.0
# Convert from HWC to CHW format and add batch dimension
frame = np.transpose(frame, (2, 0, 1))
frame = np.expand_dims(frame, axis=0)
return [{"images": frame}]
else: else:
raise ValueError(f"Unable to preprocess inputs for {self.model_type}") raise ValueError(f"Unable to preprocess inputs for {self.model_type}")