From dda9f7bfed65a4c371d0fd53c1defe6213a5d37a Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sat, 7 Mar 2026 07:42:27 -0600 Subject: [PATCH] apply filters after clustering (#22308) apply length and format filters to the clustered representative plate rather than individual OCR readings, so noisy variants still contribute to clustering even when they don't pass on their own --- .../common/license_plate/mixin.py | 50 +++++++++---------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/frigate/data_processing/common/license_plate/mixin.py b/frigate/data_processing/common/license_plate/mixin.py index ae06c0d0a..c184b8b75 100644 --- a/frigate/data_processing/common/license_plate/mixin.py +++ b/frigate/data_processing/common/license_plate/mixin.py @@ -401,35 +401,10 @@ class LicensePlateProcessingMixin: all_confidences.append(flat_confidences) all_areas.append(combined_area) - # Step 3: Filter and sort the combined plates + # Step 3: Sort the combined plates if all_license_plates: - filtered_data = [] - for plate, conf_list, area in zip( - all_license_plates, all_confidences, all_areas - ): - if len(plate) < self.lpr_config.min_plate_length: - logger.debug( - f"{camera}: Filtered out '{plate}' due to length ({len(plate)} < {self.lpr_config.min_plate_length})" - ) - continue - - if self.lpr_config.format: - try: - if not re.fullmatch(self.lpr_config.format, plate): - logger.debug( - f"{camera}: Filtered out '{plate}' due to format mismatch" - ) - continue - except re.error: - # Skip format filtering if regex is invalid - logger.error( - f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}" - ) - - filtered_data.append((plate, conf_list, area)) - sorted_data = sorted( - filtered_data, + zip(all_license_plates, all_confidences, all_areas), key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0), reverse=True, ) @@ -1557,6 +1532,27 @@ class LicensePlateProcessingMixin: f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})" ) + # Apply length and format filters to the clustered representative + # rather than individual OCR readings, so noisy variants still + # contribute to clustering even when they don't pass on their own. + if len(rep_plate) < self.lpr_config.min_plate_length: + logger.debug( + f"{camera}: Filtered out clustered plate '{rep_plate}' due to length ({len(rep_plate)} < {self.lpr_config.min_plate_length})" + ) + return + + if self.lpr_config.format: + try: + if not re.fullmatch(self.lpr_config.format, rep_plate): + logger.debug( + f"{camera}: Filtered out clustered plate '{rep_plate}' due to format mismatch" + ) + return + except re.error: + logger.error( + f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}" + ) + # Update stored rep self.detected_license_plates[id].update( {