model fixes and small tweaks

This commit is contained in:
Josh Hawkins 2024-10-26 06:06:12 -05:00
parent 606eef1c58
commit 62e3315086
4 changed files with 87 additions and 60 deletions

View File

@ -9,20 +9,24 @@ from shapely.geometry import Polygon
from frigate.comms.inter_process import InterProcessRequestor
from frigate.config.semantic_search import LicensePlateRecognitionConfig
from frigate.embeddings.functions.onnx import GenericONNXEmbedding, ModelTypeEnum
from frigate.embeddings.embeddings import Embeddings
logger = logging.getLogger(__name__)
class LicensePlateRecognition:
def __init__(
self, config: LicensePlateRecognitionConfig, requestor: InterProcessRequestor
self,
config: LicensePlateRecognitionConfig,
requestor: InterProcessRequestor,
embeddings: Embeddings,
):
self.lpr_config = config
self.requestor = requestor
self.detection_model = self._create_detection_model()
self.classification_model = self._create_classification_model()
self.recognition_model = self._create_recognition_model()
self.embeddings = embeddings
self.detection_model = self.embeddings.lpr_detection_model
self.classification_model = self.embeddings.lpr_classification_model
self.recognition_model = self.embeddings.lpr_recognition_model
self.ctc_decoder = CTCDecoder()
self.batch_size = 6
@ -32,49 +36,12 @@ class LicensePlateRecognition:
self.max_size = 960
self.box_thresh = 0.8
self.mask_thresh = 0.8
self.mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
self.std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype(
"float64"
)
def _create_detection_model(self) -> GenericONNXEmbedding:
return GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="detection.onnx",
download_urls={
"detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_detect,
requestor=self.requestor,
device="CPU",
)
def _create_classification_model(self) -> GenericONNXEmbedding:
return GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="classification.onnx",
download_urls={
"classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_classify,
requestor=self.requestor,
device="CPU",
)
def _create_recognition_model(self) -> GenericONNXEmbedding:
return GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="recognition.onnx",
download_urls={
"recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_recognize,
requestor=self.requestor,
device="CPU",
)
if self.lpr_config.enabled:
# all models need to be loaded to run LPR
self.detection_model._load_model_and_utils()
self.classification_model._load_model_and_utils()
self.recognition_model._load_model_and_utils()
def detect(self, image: np.ndarray) -> List[np.ndarray]:
"""
@ -179,6 +146,15 @@ class LicensePlateRecognition:
Returns:
Tuple[List[str], List[float], List[int]]: Detected license plate texts, confidence scores, and areas of the plates.
"""
if (
self.detection_model.runner is None
or self.classification_model.runner is None
or self.recognition_model.runner is None
):
# we might still be downloading the models
logger.debug("Model runners not loaded")
return [], [], []
plate_points = self.detect(image)
if len(plate_points) == 0:
return [], [], []
@ -209,7 +185,7 @@ class LicensePlateRecognition:
average_confidence = conf
# TODO: remove
# set to True to write each cropped image for debugging
if False:
save_image = cv2.cvtColor(
rotated_images[original_idx], cv2.COLOR_RGB2BGR
@ -251,9 +227,12 @@ class LicensePlateRecognition:
Returns:
np.ndarray: The normalized image, transposed to match the model's expected input format.
"""
mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1).astype("float64")
std = 1 / np.array([58.395, 57.12, 57.375]).reshape(1, -1).astype("float64")
image = image.astype("float32")
cv2.subtract(image, self.mean, image)
cv2.multiply(image, self.std, image)
cv2.subtract(image, mean, image)
cv2.multiply(image, std, image)
return image.transpose((2, 0, 1))[np.newaxis, ...]
def boxes_from_bitmap(

View File

@ -77,6 +77,10 @@ class Embeddings:
if config.semantic_search.model_size == "large"
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
"jinaai/jina-clip-v1-preprocessor_config.json",
"facenet-facenet.onnx",
"paddleocr-onnx-detection.onnx",
"paddleocr-onnx-classification.onnx",
"paddleocr-onnx-recognition.onnx",
]
for model in models:
@ -138,6 +142,47 @@ class Embeddings:
device="GPU",
)
self.lpr_detection_model = None
self.lpr_classification_model = None
self.lpr_recognition_model = None
if self.config.lpr.enabled:
self.lpr_detection_model = GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="detection.onnx",
download_urls={
"detection.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/detection.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_detect,
requestor=self.requestor,
device="CPU",
)
self.lpr_classification_model = GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="classification.onnx",
download_urls={
"classification.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/classification.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_classify,
requestor=self.requestor,
device="CPU",
)
self.lpr_recognition_model = GenericONNXEmbedding(
model_name="paddleocr-onnx",
model_file="recognition.onnx",
download_urls={
"recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx"
},
model_size="large",
model_type=ModelTypeEnum.alpr_recognize,
requestor=self.requestor,
device="CPU",
)
def embed_thumbnail(
self, event_id: str, thumbnail: bytes, upsert: bool = True
) -> ndarray:

View File

@ -92,7 +92,7 @@ class GenericONNXEmbedding:
files_names,
ModelStatusTypesEnum.downloaded,
)
self._load_model_and_tokenizer()
self._load_model_and_utils()
logger.debug(f"models are already downloaded for {self.model_name}")
def _download_model(self, path: str):
@ -132,7 +132,7 @@ class GenericONNXEmbedding:
},
)
def _load_model_and_tokenizer(self):
def _load_model_and_utils(self):
if self.runner is None:
if self.downloader:
self.downloader.wait_for_download()
@ -254,7 +254,7 @@ class GenericONNXEmbedding:
def __call__(
self, inputs: Union[List[str], List[Image.Image], List[str]]
) -> List[np.ndarray]:
self._load_model_and_tokenizer()
self._load_model_and_utils()
if self.runner is None or (
self.tokenizer is None and self.feature_extractor is None
):

View File

@ -75,13 +75,12 @@ class EmbeddingMaintainer(threading.Thread):
# set license plate recognition conditions
self.lpr_config = self.config.lpr
self.requires_license_plate_detection = (
"license_plate" not in self.config.model.all_attributes
)
self.detected_license_plates: dict[str, dict[str, any]] = {}
self.license_plate_recognition = LicensePlateRecognition(
self.lpr_config, self.requestor
)
if self.lpr_config.enabled:
self.license_plate_recognition = LicensePlateRecognition(
self.lpr_config, self.requestor, self.embeddings
)
@property
def face_detector(self) -> cv2.FaceDetectorYN:
@ -555,8 +554,12 @@ class EmbeddingMaintainer(threading.Thread):
if license_plates:
for plate, confidence, text_area in zip(license_plates, confidences, areas):
avg_confidence = (
(sum(confidence) / len(confidence)) if confidence else 0
)
logger.debug(
f"Detected text: {plate} (average confidence: {(sum(confidence) / len(confidence)):.2f}, area: {text_area} pixels)"
f"Detected text: {plate} (average confidence: {avg_confidence:.2f}, area: {text_area} pixels)"
)
else:
# no plates found