Fix state classification selection

2026-07-10 05:51:14 +03:00 · 2025-10-22 13:56:04 -06:00 · 2025-10-22 13:56:04 -06:00 · 6773a9aeb1
commit 6773a9aeb1
parent 9b93667b87
3 changed files with 39 additions and 45 deletions
--- a/frigate/api/classification.py
+++ b/frigate/api/classification.py
@ -773,25 +773,13 @@ def delete_classification_train_images(request: Request, name: str, body: dict =
 async def generate_state_examples(request: Request, body: GenerateStateExamplesBody):
    """Generate examples for state classification."""
    try:
-        cameras_with_pixels = {}
+        cameras_normalized = {
-        config: FrigateConfig = request.app.frigate_config
+            camera_name: tuple(crop)
            for camera_name, crop in body.cameras.items()
            if camera_name in request.app.frigate_config.cameras
        }
-        for camera_name, crop in body.cameras.items():
+        collect_state_classification_examples(body.model_name, cameras_normalized)
            if camera_name not in config.cameras:
                continue
            camera_config = config.cameras[camera_name]
            width = camera_config.detect.width
            height = camera_config.detect.height
            x1 = int(crop[0] * width)
            y1 = int(crop[1] * height)
            x2 = int((crop[0] + crop[2]) * width)
            y2 = int((crop[1] + crop[3]) * height)
            cameras_with_pixels[camera_name] = (x1, y1, x2, y2)
        collect_state_classification_examples(body.model_name, cameras_with_pixels)
        return JSONResponse(
            content={"success": True, "message": "Example generation completed"},
--- a/frigate/util/classification.py
+++ b/frigate/util/classification.py
@ -181,7 +181,7 @@ def kickoff_model_training(
@staticmethod
 def collect_state_classification_examples(
-    model_name: str, cameras: dict[str, tuple[int, int, int, int]]
+    model_name: str, cameras: dict[str, tuple[float, float, float, float]]
 ) -> None:
    """
    Collect representative state classification examples from review items.
@ -195,7 +195,7 @@ def collect_state_classification_examples(
    Args:
        model_name: Name of the classification model
-        cameras: Dict mapping camera names to crop coordinates (x1, y1, x2, y2)
+        cameras: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1)
    """
    dataset_dir = os.path.join(CLIPS_DIR, model_name, "dataset")
    temp_dir = os.path.join(dataset_dir, "temp")
@ -206,6 +206,7 @@ def collect_state_classification_examples(
    review_items = list(
        ReviewSegment.select()
        .where(ReviewSegment.camera.in_(camera_names))
        .where(ReviewSegment.end_time.is_null(False))
        .order_by(ReviewSegment.start_time.asc())
    )
@ -336,7 +337,7 @@ def _extract_keyframes(
    ffmpeg_path: str,
    timestamps: list[dict],
    output_dir: str,
-    camera_crops: dict[str, tuple[int, int, int, int]],
+    camera_crops: dict[str, tuple[float, float, float, float]],
 ) -> list[str]:
    """
    Extract keyframes from recordings at specified timestamps and crop to specified regions.
@ -345,7 +346,7 @@ def _extract_keyframes(
        ffmpeg_path: Path to ffmpeg binary
        timestamps: List of timestamp dicts from _select_balanced_timestamps
        output_dir: Directory to save extracted frames
-        camera_crops: Dict mapping camera names to crop coordinates (x1, y1, x2, y2)
+        camera_crops: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1)
    Returns:
        List of paths to successfully extracted and cropped keyframe images
@ -360,7 +361,7 @@ def _extract_keyframes(
            logger.warning(f"No crop coordinates for camera {camera}")
            continue
-        x1, y1, x2, y2 = camera_crops[camera]
+        norm_x1, norm_y1, norm_x2, norm_y2 = camera_crops[camera]
        try:
            recording = (
@ -395,6 +396,12 @@ def _extract_keyframes(
                if img is not None:
                    height, width = img.shape[:2]
                    x1 = int(norm_x1 * width)
                    y1 = int(norm_y1 * height)
                    x2 = int(norm_x2 * width)
                    y2 = int(norm_y2 * height)
                    x1_clipped = max(0, min(x1, width))
                    y1_clipped = max(0, min(y1, height))
                    x2_clipped = max(0, min(x2, width))
@ -646,10 +653,8 @@ def _extract_event_thumbnails(events: list[Event], output_dir: str) -> list[str]
                if img is not None:
                    height, width = img.shape[:2]
-                    # Calculate crop based on object size relative to the thumbnail region
+                    crop_size = 1.0
                    crop_size = 1.0  # Default to no crop
                    if event.data and "box" in event.data and "region" in event.data:
                        # Box is [x, y, w, h] format
                        box = event.data["box"]
                        region = event.data["region"]
@ -657,26 +662,22 @@ def _extract_event_thumbnails(events: list[Event], output_dir: str) -> list[str]
                            box_w, box_h = box[2], box[3]
                            region_w, region_h = region[2], region[3]
                            # Calculate what percentage of the region the box occupies
                            box_area = (box_w * box_h) / (region_w * region_h)
-                            # Crop inversely proportional to object size in thumbnail
+                            if box_area < 0.05:
                            # Small objects need more crop (zoom in), large objects need less
                            if box_area < 0.05:  # Very small (< 5%)
                                crop_size = 0.4
-                            elif box_area < 0.10:  # Small (5-10%)
+                            elif box_area < 0.10:
                                crop_size = 0.5
-                            elif box_area < 0.20:  # Medium-small (10-20%)
+                            elif box_area < 0.20:
                                crop_size = 0.65
-                            elif box_area < 0.35:  # Medium (20-35%)
+                            elif box_area < 0.35:
                                crop_size = 0.80
-                            else:  # Large (>35%)
+                            else:
                                crop_size = 0.95
                    crop_width = int(width * crop_size)
                    crop_height = int(height * crop_size)
                    # Calculate center crop coordinates
                    x1 = (width - crop_width) // 2
                    y1 = (height - crop_height) // 2
                    x2 = x1 + crop_width
--- a/web/src/components/classification/wizard/Step2StateArea.tsx
+++ b/web/src/components/classification/wizard/Step2StateArea.tsx
@ -18,7 +18,7 @@ import { resolveCameraName } from "@/hooks/use-camera-friendly-name";
 export type CameraAreaConfig = {
  camera: string;
-  crop: [number, number, number, number]; // [x, y, width, height] normalized 0-1
+  crop: [number, number, number, number];
 };
 export type Step2FormData = {
@ -110,7 +110,7 @@ export default function Step2StateArea({
    (cameraName: string) => {
      const newArea: CameraAreaConfig = {
        camera: cameraName,
-        crop: [0.385, 0.385, 0.15, 0.15],
+        crop: [0.385, 0.385, 0.535, 0.535],
      };
      setCameraAreas([...cameraAreas, newArea]);
      setSelectedCameraIndex(cameraAreas.length);
@ -169,13 +169,12 @@ export default function Step2StateArea({
      rect.scaleX(1);
      rect.scaleY(1);
-      // Normalize to 0-1 range for storage
+      const x1 = rect.x() / imageSize.width;
-      const x = rect.x() / imageSize.width;
+      const y1 = rect.y() / imageSize.height;
-      const y = rect.y() / imageSize.height;
+      const x2 = (rect.x() + size) / imageSize.width;
-      const width = size / imageSize.width;
+      const y2 = (rect.y() + size) / imageSize.height;
      const height = size / imageSize.height;
-      handleCropChange([x, y, width, height]);
+      handleCropChange([x1, y1, x2, y2]);
    }
  }, [imageSize, handleCropChange]);
@ -322,8 +321,14 @@ export default function Step2StateArea({
                        ref={rectRef}
                        x={selectedCamera.crop[0] * imageSize.width}
                        y={selectedCamera.crop[1] * imageSize.height}
-                        width={selectedCamera.crop[2] * imageSize.width}
+                        width={
-                        height={selectedCamera.crop[2] * imageSize.width}
+                          (selectedCamera.crop[2] - selectedCamera.crop[0]) *
                          imageSize.width
                        }
                        height={
                          (selectedCamera.crop[3] - selectedCamera.crop[1]) *
                          imageSize.height
                        }
                        stroke="#3b82f6"
                        strokeWidth={2}
                        fill="rgba(59, 130, 246, 0.1)"