From 21c99cc95543a4b5b3c4c8536f1bb6b9ef469b16 Mon Sep 17 00:00:00 2001 From: Nick Mowen Date: Fri, 27 Oct 2023 10:29:59 -0600 Subject: [PATCH] Use object grid to reduce score for objects with unexpected sizes --- frigate/util/object.py | 116 ++++++++++++++++++++++++++++++++++------- frigate/video.py | 32 +++++++++--- 2 files changed, 124 insertions(+), 24 deletions(-) diff --git a/frigate/util/object.py b/frigate/util/object.py index 69d2c6b32..e9322bd14 100644 --- a/frigate/util/object.py +++ b/frigate/util/object.py @@ -27,6 +27,8 @@ from frigate.util.image import ( logger = logging.getLogger(__name__) GRID_SIZE = 8 +MIN_OBJECT_DATA_POINTS = 5 +MIN_STD_ACCURACY = 0.5 def get_camera_regions_grid( @@ -36,16 +38,24 @@ def get_camera_regions_grid( # get grid from db if available try: regions: Regions = Regions.select().where(Regions.camera == name).get() - grid = regions.grid - last_update = regions.last_update + + # check if old style grid is used and rebuild + if regions.grid[0][0].get("sizes") is not None: + grid = [] + last_update = 0 + else: + grid = regions.grid + last_update = regions.last_update except DoesNotExist: grid = [] + last_update = 0 + + if last_update == 0: for x in range(GRID_SIZE): row = [] for y in range(GRID_SIZE): - row.append({"sizes": []}) + row.append({"region": {"sizes": []}}) grid.append(row) - last_update = 0 # get events for timeline entries events = ( @@ -86,6 +96,9 @@ def get_camera_regions_grid( box = t["data"]["box"] + ### + # save region to grid + ### # calculate centroid position x = box[0] + (box[2] / 2) y = box[1] + (box[3] / 2) @@ -103,24 +116,40 @@ def get_camera_regions_grid( 1.35, ) # save width of region to grid as relative - grid[x_pos][y_pos]["sizes"].append( + grid[x_pos][y_pos]["region"]["sizes"].append( (calculated_region[2] - calculated_region[0]) / width ) + ### + # save object size to grid + ### + label = t["data"]["label"] + x = box[0] + (box[2] / 2) + y = box[1] + box[3] + + x_pos = int(x * GRID_SIZE) + y_pos = int(y * GRID_SIZE) + + if grid[x_pos][y_pos].get(label): + grid[x_pos][y_pos][label]["sizes"].append(box[2] * box[3]) + else: + grid[x_pos][y_pos][label] = {"sizes": [box[2] * box[3]]} + for x in range(GRID_SIZE): for y in range(GRID_SIZE): cell = grid[x][y] - - if len(cell["sizes"]) == 0: - continue - - std_dev = np.std(cell["sizes"]) - mean = np.mean(cell["sizes"]) - logger.debug(f"std dev: {std_dev} mean: {mean}") cell["x"] = x cell["y"] = y - cell["std_dev"] = std_dev - cell["mean"] = mean + + for key in cell.keys(): + if key in ["x", "y"] or len(cell[key]["sizes"]) == 0: + continue + + std_dev = np.std(cell[key]["sizes"]) + mean = np.mean(cell[key]["sizes"]) + logger.debug(f"{x},{y} :: {key} -> std dev: {std_dev} mean: {mean}") + cell[key]["std_dev"] = std_dev + cell[key]["mean"] = mean # update db with new grid region = { @@ -172,7 +201,7 @@ def get_region_from_grid( grid_x = int(centroid[0] / frame_shape[1] * GRID_SIZE) grid_y = int(centroid[1] / frame_shape[0] * GRID_SIZE) - cell = region_grid[grid_x][grid_y] + cell = region_grid[grid_x][grid_y]["region"] # if there is no known data, get standard region for motion box if not cell or not cell["sizes"]: @@ -424,17 +453,19 @@ def get_startup_regions( """Get a list of regions to run on startup.""" # return 8 most popular regions for the camera all_cells = np.concatenate(region_grid).flat - startup_cells = sorted(all_cells, key=lambda c: len(c["sizes"]), reverse=True)[0:8] + startup_cells = sorted( + all_cells, key=lambda c: len(c["region"]["sizes"]), reverse=True + )[0:8] regions = [] for cell in startup_cells: # rest of the cells are empty - if not cell["sizes"]: + if not cell["region"]["sizes"]: break x = frame_shape[1] / GRID_SIZE * (0.5 + cell["x"]) y = frame_shape[0] / GRID_SIZE * (0.5 + cell["y"]) - size = cell["mean"] * frame_shape[1] + size = cell["region"]["mean"] * frame_shape[1] regions.append( calculate_region( frame_shape, @@ -544,3 +575,52 @@ def reduce_detections( return get_consolidated_object_detections( reduce_overlapping_detections(all_detections) ) + + +def validate_object_size_with_grid( + detect_config: DetectConfig, + label: str, + score: float, + box: tuple[int], + region_grid: list[list[dict[str, any]]], +) -> tuple[bool, float]: + """Validate if the object is within expected size and return new size if not.""" + # get object position in grid with bottom center + x = box[0] + ((box[2] - box[0]) / 2) + y = box[3] + x_pos = int(x / detect_config.width * GRID_SIZE) + y_pos = int(y / detect_config.height * GRID_SIZE) + cell = region_grid[x_pos][y_pos] + + # get data specific for this object type + object_data = cell.get(label) + + # check if enough data in grid to process + if not object_data or len(object_data["sizes"]) < MIN_OBJECT_DATA_POINTS: + return (False, 0.0) + + # check if data is accurate enough to process + std_dev = object_data["std_dev"] + mean = object_data["mean"] + + if std_dev / mean > MIN_STD_ACCURACY: + return (False, 0.0) + + relative_area = ((box[2] - box[0]) / detect_config.width) * ( + (box[3] - box[1]) / detect_config.height + ) + + diff = abs(mean - relative_area) + + # if object area is within 1 std deviation, no changes should be made + if diff < std_dev: + return (False, 0.0) + + logger.debug(f"Comparing for {x_pos},{y_pos} {label} {relative_area} vs {mean} +- {std_dev}") + + # calculate how many std deviations away the area is + factor = relative_area / std_dev + + # reduce object score by 5% for every factor + new_score = score - factor * 0.05 + return (True, new_score) diff --git a/frigate/video.py b/frigate/video.py index b479fd725..40a7a2427 100755 --- a/frigate/video.py +++ b/frigate/video.py @@ -11,7 +11,7 @@ import time import cv2 from setproctitle import setproctitle -from frigate.config import CameraConfig, DetectConfig, ModelConfig +from frigate.config import CameraConfig, DetectConfig, FilterConfig, ModelConfig from frigate.const import ( ALL_ATTRIBUTE_LABELS, ATTRIBUTE_LABEL_MAP, @@ -44,6 +44,7 @@ from frigate.util.object import ( intersects_any, is_object_filtered, reduce_detections, + validate_object_size_with_grid, ) from frigate.util.services import listen @@ -459,10 +460,11 @@ def detect( detect_config: DetectConfig, object_detector, frame, - model_config, + model_config: ModelConfig, region, objects_to_track, - object_filters, + object_filters: dict[str, FilterConfig], + region_grid: list[list[dict[str, any]]], ): tensor_input = create_tensor_input(frame, model_config, region) @@ -484,18 +486,35 @@ def detect( height = y_max - y_min area = width * height ratio = width / max(1, height) + label = d[0] + score = d[1] + box = (x_min, y_min, x_max, y_max) + + # check if object is outside range of expected sizes + # only check objects with scores that are > min_score + if score >= object_filters[label].min_score: + outside_expected_size, new_score = validate_object_size_with_grid( + detect_config, d[0], score, box, region_grid + ) + if outside_expected_size: + logger.debug(f"{label} {box} has a new score {score} -> {new_score}") + score = new_score + det = ( - d[0], - d[1], - (x_min, y_min, x_max, y_max), + label, + score, + box, area, ratio, region, ) + # apply object filters if is_object_filtered(det, objects_to_track, object_filters): continue + detections.append(det) + return detections @@ -690,6 +709,7 @@ def process_frames( region, objects_to_track, object_filters, + region_grid, ) )