apply filters after clustering

apply length and format filters to the clustered representative plate rather than individual OCR readings, so noisy variants still contribute to clustering even when they don't pass on their own
This commit is contained in:
Josh Hawkins 2026-03-07 06:08:14 -06:00
parent c2e667c0dd
commit 5c99409bd9

View File

@ -401,35 +401,10 @@ class LicensePlateProcessingMixin:
all_confidences.append(flat_confidences)
all_areas.append(combined_area)
# Step 3: Filter and sort the combined plates
# Step 3: Sort the combined plates
if all_license_plates:
filtered_data = []
for plate, conf_list, area in zip(
all_license_plates, all_confidences, all_areas
):
if len(plate) < self.lpr_config.min_plate_length:
logger.debug(
f"{camera}: Filtered out '{plate}' due to length ({len(plate)} < {self.lpr_config.min_plate_length})"
)
continue
if self.lpr_config.format:
try:
if not re.fullmatch(self.lpr_config.format, plate):
logger.debug(
f"{camera}: Filtered out '{plate}' due to format mismatch"
)
continue
except re.error:
# Skip format filtering if regex is invalid
logger.error(
f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
)
filtered_data.append((plate, conf_list, area))
sorted_data = sorted(
filtered_data,
zip(all_license_plates, all_confidences, all_areas),
key=lambda x: (x[2], len(x[0]), sum(x[1]) / len(x[1]) if x[1] else 0),
reverse=True,
)
@ -1557,6 +1532,27 @@ class LicensePlateProcessingMixin:
f"{camera}: Clustering changed top plate '{top_plate}' (conf: {avg_confidence:.3f}) to rep '{rep_plate}' (conf: {rep_conf:.3f})"
)
# Apply length and format filters to the clustered representative
# rather than individual OCR readings, so noisy variants still
# contribute to clustering even when they don't pass on their own.
if len(rep_plate) < self.lpr_config.min_plate_length:
logger.debug(
f"{camera}: Filtered out clustered plate '{rep_plate}' due to length ({len(rep_plate)} < {self.lpr_config.min_plate_length})"
)
return
if self.lpr_config.format:
try:
if not re.fullmatch(self.lpr_config.format, rep_plate):
logger.debug(
f"{camera}: Filtered out clustered plate '{rep_plate}' due to format mismatch"
)
return
except re.error:
logger.error(
f"{camera}: Invalid regex in LPR format configuration: {self.lpr_config.format}"
)
# Update stored rep
self.detected_license_plates[id].update(
{