apply filters after clustering (#22308)

apply length and format filters to the clustered representative plate rather than individual OCR readings, so noisy variants still contribute to clustering even when they don't pass on their own
2026-04-24 23:57:41 +03:00 · 2026-03-07 07:42:27 -06:00 · 2026-03-07 07:42:27 -06:00 · dda9f7bfed
commit dda9f7bfed
parent c2e667c0dd
1 changed files with 23 additions and 27 deletions
--- a/frigate/data_processing/common/license_plate/mixin.py
+++ b/frigate/data_processing/common/license_plate/mixin.py
@ -401,35 +401,10 @@ class LicensePlateProcessingMixin:
            all_confidences.append(flat_confidences)
            all_areas.append(combined_area)
-        # Step 3: Filter and sort the combined plates
+        # Step 3: Sort the combined plates
        if all_license_plates:
            filtered_data = []
            for plate, conf_list, area in zip(
                all_license_plates, all_confidences, all_areas
            ):
                if len(plate) < self.lpr_config.min_plate_length:
                    logger.debug(
                        f"{camera}: Filtered out '{plate}' due to length ({len(plate)} < {self.lpr_config.min_plate_length})"
                    )
                    continue
                if self.lpr_config.format:
                    try:
                        if not re.fullmatch(self.lpr_config.format, plate):
                            logger.debug(
                                f"{camera}: Filtered out '{plate}' due to format mismatch"
                            )
                            continue
                    except re.error:
                        # Skip format filtering if regex is invalid
                        logger.error(
                            f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
                        )
                filtered_data.append((plate, conf_list, area))
            sorted_data = sorted(
-                filtered_data,
+                zip(all_license_plates, all_confidences, all_areas),
                key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0),
                reverse=True,
            )
@ -1557,6 +1532,27 @@ class LicensePlateProcessingMixin:
                f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})"
            )
        # Apply length and format filters to the clustered representative
        # rather than individual OCR readings, so noisy variants still
        # contribute to clustering even when they don't pass on their own.
        if len(rep_plate) < self.lpr_config.min_plate_length:
            logger.debug(
                f"{camera}: Filtered out clustered plate '{rep_plate}' due to length ({len(rep_plate)} < {self.lpr_config.min_plate_length})"
            )
            return
        if self.lpr_config.format:
            try:
                if not re.fullmatch(self.lpr_config.format, rep_plate):
                    logger.debug(
                        f"{camera}: Filtered out clustered plate '{rep_plate}' due to format mismatch"
                    )
                    return
            except re.error:
                logger.error(
                    f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
                )
        # Update stored rep
        self.detected_license_plates[id].update(
            {