apply filters after clustering (#22308)

apply length and format filters to the clustered representative plate rather than individual OCR readings, so noisy variants still contribute to clustering even when they don't pass on their own
This commit is contained in:
Josh Hawkins 2026-03-07 07:42:27 -06:00 committed by GitHub
parent c2e667c0dd
commit dda9f7bfed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -401,35 +401,10 @@ class LicensePlateProcessingMixin:
all_confidences.append(flat_confidences) all_confidences.append(flat_confidences)
all_areas.append(combined_area) all_areas.append(combined_area)
# Step 3: Filter and sort the combined plates # Step 3: Sort the combined plates
if all_license_plates: if all_license_plates:
filtered_data = []
for plate, conf_list, area in zip(
all_license_plates, all_confidences, all_areas
):
if len(plate) < self.lpr_config.min_plate_length:
logger.debug(
f"{camera}: Filtered out '{plate}' due to length ({len(plate)} < {self.lpr_config.min_plate_length})"
)
continue
if self.lpr_config.format:
try:
if not re.fullmatch(self.lpr_config.format, plate):
logger.debug(
f"{camera}: Filtered out '{plate}' due to format mismatch"
)
continue
except re.error:
# Skip format filtering if regex is invalid
logger.error(
f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
)
filtered_data.append((plate, conf_list, area))
sorted_data = sorted( sorted_data = sorted(
filtered_data, zip(all_license_plates, all_confidences, all_areas),
key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0), key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0),
reverse=True, reverse=True,
) )
@ -1557,6 +1532,27 @@ class LicensePlateProcessingMixin:
f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})" f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})"
) )
# Apply length and format filters to the clustered representative
# rather than individual OCR readings, so noisy variants still
# contribute to clustering even when they don't pass on their own.
if len(rep_plate) < self.lpr_config.min_plate_length:
logger.debug(
f"{camera}: Filtered out clustered plate '{rep_plate}' due to length ({len(rep_plate)} < {self.lpr_config.min_plate_length})"
)
return
if self.lpr_config.format:
try:
if not re.fullmatch(self.lpr_config.format, rep_plate):
logger.debug(
f"{camera}: Filtered out clustered plate '{rep_plate}' due to format mismatch"
)
return
except re.error:
logger.error(
f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
)
# Update stored rep # Update stored rep
self.detected_license_plates[id].update( self.detected_license_plates[id].update(
{ {