Fix state classification selection

This commit is contained in:
Nicolas Mowen 2025-10-22 13:56:04 -06:00
parent 9b93667b87
commit 6773a9aeb1
3 changed files with 39 additions and 45 deletions

View File

@ -773,25 +773,13 @@ def delete_classification_train_images(request: Request, name: str, body: dict =
async def generate_state_examples(request: Request, body: GenerateStateExamplesBody):
"""Generate examples for state classification."""
try:
cameras_with_pixels = {}
config: FrigateConfig = request.app.frigate_config
cameras_normalized = {
camera_name: tuple(crop)
for camera_name, crop in body.cameras.items()
if camera_name in request.app.frigate_config.cameras
}
for camera_name, crop in body.cameras.items():
if camera_name not in config.cameras:
continue
camera_config = config.cameras[camera_name]
width = camera_config.detect.width
height = camera_config.detect.height
x1 = int(crop[0] * width)
y1 = int(crop[1] * height)
x2 = int((crop[0] + crop[2]) * width)
y2 = int((crop[1] + crop[3]) * height)
cameras_with_pixels[camera_name] = (x1, y1, x2, y2)
collect_state_classification_examples(body.model_name, cameras_with_pixels)
collect_state_classification_examples(body.model_name, cameras_normalized)
return JSONResponse(
content={"success": True, "message": "Example generation completed"},

View File

@ -181,7 +181,7 @@ def kickoff_model_training(
@staticmethod
def collect_state_classification_examples(
model_name: str, cameras: dict[str, tuple[int, int, int, int]]
model_name: str, cameras: dict[str, tuple[float, float, float, float]]
) -> None:
"""
Collect representative state classification examples from review items.
@ -195,7 +195,7 @@ def collect_state_classification_examples(
Args:
model_name: Name of the classification model
cameras: Dict mapping camera names to crop coordinates (x1, y1, x2, y2)
cameras: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1)
"""
dataset_dir = os.path.join(CLIPS_DIR, model_name, "dataset")
temp_dir = os.path.join(dataset_dir, "temp")
@ -206,6 +206,7 @@ def collect_state_classification_examples(
review_items = list(
ReviewSegment.select()
.where(ReviewSegment.camera.in_(camera_names))
.where(ReviewSegment.end_time.is_null(False))
.order_by(ReviewSegment.start_time.asc())
)
@ -336,7 +337,7 @@ def _extract_keyframes(
ffmpeg_path: str,
timestamps: list[dict],
output_dir: str,
camera_crops: dict[str, tuple[int, int, int, int]],
camera_crops: dict[str, tuple[float, float, float, float]],
) -> list[str]:
"""
Extract keyframes from recordings at specified timestamps and crop to specified regions.
@ -345,7 +346,7 @@ def _extract_keyframes(
ffmpeg_path: Path to ffmpeg binary
timestamps: List of timestamp dicts from _select_balanced_timestamps
output_dir: Directory to save extracted frames
camera_crops: Dict mapping camera names to crop coordinates (x1, y1, x2, y2)
camera_crops: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1)
Returns:
List of paths to successfully extracted and cropped keyframe images
@ -360,7 +361,7 @@ def _extract_keyframes(
logger.warning(f"No crop coordinates for camera {camera}")
continue
x1, y1, x2, y2 = camera_crops[camera]
norm_x1, norm_y1, norm_x2, norm_y2 = camera_crops[camera]
try:
recording = (
@ -395,6 +396,12 @@ def _extract_keyframes(
if img is not None:
height, width = img.shape[:2]
x1 = int(norm_x1 * width)
y1 = int(norm_y1 * height)
x2 = int(norm_x2 * width)
y2 = int(norm_y2 * height)
x1_clipped = max(0, min(x1, width))
y1_clipped = max(0, min(y1, height))
x2_clipped = max(0, min(x2, width))
@ -646,10 +653,8 @@ def _extract_event_thumbnails(events: list[Event], output_dir: str) -> list[str]
if img is not None:
height, width = img.shape[:2]
# Calculate crop based on object size relative to the thumbnail region
crop_size = 1.0 # Default to no crop
crop_size = 1.0
if event.data and "box" in event.data and "region" in event.data:
# Box is [x, y, w, h] format
box = event.data["box"]
region = event.data["region"]
@ -657,26 +662,22 @@ def _extract_event_thumbnails(events: list[Event], output_dir: str) -> list[str]
box_w, box_h = box[2], box[3]
region_w, region_h = region[2], region[3]
# Calculate what percentage of the region the box occupies
box_area = (box_w * box_h) / (region_w * region_h)
# Crop inversely proportional to object size in thumbnail
# Small objects need more crop (zoom in), large objects need less
if box_area < 0.05: # Very small (< 5%)
if box_area < 0.05:
crop_size = 0.4
elif box_area < 0.10: # Small (5-10%)
elif box_area < 0.10:
crop_size = 0.5
elif box_area < 0.20: # Medium-small (10-20%)
elif box_area < 0.20:
crop_size = 0.65
elif box_area < 0.35: # Medium (20-35%)
elif box_area < 0.35:
crop_size = 0.80
else: # Large (>35%)
else:
crop_size = 0.95
crop_width = int(width * crop_size)
crop_height = int(height * crop_size)
# Calculate center crop coordinates
x1 = (width - crop_width) // 2
y1 = (height - crop_height) // 2
x2 = x1 + crop_width

View File

@ -18,7 +18,7 @@ import { resolveCameraName } from "@/hooks/use-camera-friendly-name";
export type CameraAreaConfig = {
camera: string;
crop: [number, number, number, number]; // [x, y, width, height] normalized 0-1
crop: [number, number, number, number];
};
export type Step2FormData = {
@ -110,7 +110,7 @@ export default function Step2StateArea({
(cameraName: string) => {
const newArea: CameraAreaConfig = {
camera: cameraName,
crop: [0.385, 0.385, 0.15, 0.15],
crop: [0.385, 0.385, 0.535, 0.535],
};
setCameraAreas([...cameraAreas, newArea]);
setSelectedCameraIndex(cameraAreas.length);
@ -169,13 +169,12 @@ export default function Step2StateArea({
rect.scaleX(1);
rect.scaleY(1);
// Normalize to 0-1 range for storage
const x = rect.x() / imageSize.width;
const y = rect.y() / imageSize.height;
const width = size / imageSize.width;
const height = size / imageSize.height;
const x1 = rect.x() / imageSize.width;
const y1 = rect.y() / imageSize.height;
const x2 = (rect.x() + size) / imageSize.width;
const y2 = (rect.y() + size) / imageSize.height;
handleCropChange([x, y, width, height]);
handleCropChange([x1, y1, x2, y2]);
}
}, [imageSize, handleCropChange]);
@ -322,8 +321,14 @@ export default function Step2StateArea({
ref={rectRef}
x={selectedCamera.crop[0] * imageSize.width}
y={selectedCamera.crop[1] * imageSize.height}
width={selectedCamera.crop[2] * imageSize.width}
height={selectedCamera.crop[2] * imageSize.width}
width={
(selectedCamera.crop[2] - selectedCamera.crop[0]) *
imageSize.width
}
height={
(selectedCamera.crop[3] - selectedCamera.crop[1]) *
imageSize.height
}
stroke="#3b82f6"
strokeWidth={2}
fill="rgba(59, 130, 246, 0.1)"