From 5c99409bd9a08e38692143ff279f7e6a6c8ebc78 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sat, 7 Mar 2026 06:08:14 -0600 Subject: [PATCH] apply filters after clustering apply length and format filters to the clustered representative plate rather than individual OCR readings, so noisy variants still contribute to clustering even when they don't pass on their own --- .../common/license_plate/mixin.py | 50 +++++++++---------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/frigate/data_processing/common/license_plate/mixin.py b/frigate/data_processing/common/license_plate/mixin.py index ae06c0d0a..c184b8b75 100644 --- a/frigate/data_processing/common/license_plate/mixin.py +++ b/frigate/data_processing/common/license_plate/mixin.py @@ -401,35 +401,10 @@ class LicensePlateProcessingMixin: all_confidences.append(flat_confidences) all_areas.append(combined_area) - # Step 3: Filter and sort the combined plates + # Step 3: Sort the combined plates if all_license_plates: - filtered_data = [] - for plate, conf_list, area in zip( - all_license_plates, all_confidences, all_areas - ): - if len(plate) < self.lpr_config.min_plate_length: - logger.debug( - f"{camera}: Filtered out '{plate}' due to length ({len(plate)} < {self.lpr_config.min_plate_length})" - ) - continue - - if self.lpr_config.format: - try: - if not re.fullmatch(self.lpr_config.format, plate): - logger.debug( - f"{camera}: Filtered out '{plate}' due to format mismatch" - ) - continue - except re.error: - # Skip format filtering if regex is invalid - logger.error( - f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}" - ) - - filtered_data.append((plate, conf_list, area)) - sorted_data = sorted( - filtered_data, + zip(all_license_plates, all_confidences, all_areas), key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0), reverse=True, ) @@ -1557,6 +1532,27 @@ class LicensePlateProcessingMixin: f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})" ) + # Apply length and format filters to the clustered representative + # rather than individual OCR readings, so noisy variants still + # contribute to clustering even when they don't pass on their own. + if len(rep_plate) < self.lpr_config.min_plate_length: + logger.debug( + f"{camera}: Filtered out clustered plate '{rep_plate}' due to length ({len(rep_plate)} < {self.lpr_config.min_plate_length})" + ) + return + + if self.lpr_config.format: + try: + if not re.fullmatch(self.lpr_config.format, rep_plate): + logger.debug( + f"{camera}: Filtered out clustered plate '{rep_plate}' due to format mismatch" + ) + return + except re.error: + logger.error( + f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}" + ) + # Update stored rep self.detected_license_plates[id].update( {