diff --git a/.cursor/rules/frontend-always-use-translation-files.mdc b/.cursor/rules/frontend-always-use-translation-files.mdc new file mode 100644 index 000000000..35034069b --- /dev/null +++ b/.cursor/rules/frontend-always-use-translation-files.mdc @@ -0,0 +1,6 @@ +--- +globs: ["**/*.ts", "**/*.tsx"] +alwaysApply: false +--- + +Never write strings in the frontend directly, always write to and reference the relevant translations file. \ No newline at end of file diff --git a/docs/docs/configuration/custom_classification/object_classification.md b/docs/docs/configuration/custom_classification/object_classification.md index 9465716b7..983fce852 100644 --- a/docs/docs/configuration/custom_classification/object_classification.md +++ b/docs/docs/configuration/custom_classification/object_classification.md @@ -12,7 +12,18 @@ Object classification models are lightweight and run very fast on CPU. Inference Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer. When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training. -### Sub label vs Attribute +## Classes + +Classes are the categories your model will learn to distinguish between. Each class represents a distinct visual category that the model will predict. + +For object classification: + +- Define classes that represent different types or attributes of the detected object +- Examples: For `person` objects, classes might be `delivery_person`, `resident`, `stranger` +- Include a `none` class for objects that don't fit any specific category +- Keep classes visually distinct to improve accuracy + +### Classification Type - **Sub label**: diff --git a/docs/docs/configuration/custom_classification/state_classification.md b/docs/docs/configuration/custom_classification/state_classification.md index afc79eff8..c22661f26 100644 --- a/docs/docs/configuration/custom_classification/state_classification.md +++ b/docs/docs/configuration/custom_classification/state_classification.md @@ -12,6 +12,17 @@ State classification models are lightweight and run very fast on CPU. Inference Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer. When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training. +## Classes + +Classes are the different states an area on your camera can be in. Each class represents a distinct visual state that the model will learn to recognize. + +For state classification: + +- Define classes that represent mutually exclusive states +- Examples: `open` and `closed` for a garage door, `on` and `off` for lights +- Use at least 2 classes (typically binary states work best) +- Keep class names clear and descriptive + ## Example use cases - **Door state**: Detect if a garage or front door is open vs closed. diff --git a/frigate/api/app.py b/frigate/api/app.py index f84190407..5d09ecf00 100644 --- a/frigate/api/app.py +++ b/frigate/api/app.py @@ -387,20 +387,28 @@ def config_set(request: Request, body: AppConfigSetBody): old_config: FrigateConfig = request.app.frigate_config request.app.frigate_config = config - if body.update_topic and body.update_topic.startswith("config/cameras/"): - _, _, camera, field = body.update_topic.split("/") + if body.update_topic: + if body.update_topic.startswith("config/cameras/"): + _, _, camera, field = body.update_topic.split("/") - if field == "add": - settings = config.cameras[camera] - elif field == "remove": - settings = old_config.cameras[camera] + if field == "add": + settings = config.cameras[camera] + elif field == "remove": + settings = old_config.cameras[camera] + else: + settings = config.get_nested_object(body.update_topic) + + request.app.config_publisher.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum[field], camera), + settings, + ) else: + # Handle nested config updates (e.g., config/classification/custom/{name}) settings = config.get_nested_object(body.update_topic) - - request.app.config_publisher.publish_update( - CameraConfigUpdateTopic(CameraConfigUpdateEnum[field], camera), - settings, - ) + if settings: + request.app.config_publisher.publisher.publish( + body.update_topic, settings + ) return JSONResponse( content=( diff --git a/frigate/api/classification.py b/frigate/api/classification.py index 623ceba32..e9052097a 100644 --- a/frigate/api/classification.py +++ b/frigate/api/classification.py @@ -3,7 +3,9 @@ import datetime import logging import os +import random import shutil +import string from typing import Any import cv2 @@ -17,6 +19,8 @@ from frigate.api.auth import require_role from frigate.api.defs.request.classification_body import ( AudioTranscriptionBody, DeleteFaceImagesBody, + GenerateObjectExamplesBody, + GenerateStateExamplesBody, RenameFaceBody, ) from frigate.api.defs.response.classification_response import ( @@ -30,6 +34,10 @@ from frigate.config.camera import DetectConfig from frigate.const import CLIPS_DIR, FACE_DIR from frigate.embeddings import EmbeddingsContext from frigate.models import Event +from frigate.util.classification import ( + collect_object_classification_examples, + collect_state_classification_examples, +) from frigate.util.path import get_event_snapshot logger = logging.getLogger(__name__) @@ -159,8 +167,7 @@ def train_face(request: Request, name: str, body: dict = None): new_name = f"{sanitized_name}-{datetime.datetime.now().timestamp()}.webp" new_file_folder = os.path.join(FACE_DIR, f"{sanitized_name}") - if not os.path.exists(new_file_folder): - os.mkdir(new_file_folder) + os.makedirs(new_file_folder, exist_ok=True) if training_file_name: shutil.move(training_file, os.path.join(new_file_folder, new_name)) @@ -701,13 +708,14 @@ def categorize_classification_image(request: Request, name: str, body: dict = No status_code=404, ) - new_name = f"{category}-{datetime.datetime.now().timestamp()}.png" + random_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + timestamp = datetime.datetime.now().timestamp() + new_name = f"{category}-{timestamp}-{random_id}.png" new_file_folder = os.path.join( CLIPS_DIR, sanitize_filename(name), "dataset", category ) - if not os.path.exists(new_file_folder): - os.mkdir(new_file_folder) + os.makedirs(new_file_folder, exist_ok=True) # use opencv because webp images can not be used to train img = cv2.imread(training_file) @@ -756,3 +764,43 @@ def delete_classification_train_images(request: Request, name: str, body: dict = content=({"success": True, "message": "Successfully deleted faces."}), status_code=200, ) + + +@router.post( + "/classification/generate_examples/state", + response_model=GenericResponse, + dependencies=[Depends(require_role(["admin"]))], + summary="Generate state classification examples", +) +async def generate_state_examples(request: Request, body: GenerateStateExamplesBody): + """Generate examples for state classification.""" + model_name = sanitize_filename(body.model_name) + cameras_normalized = { + camera_name: tuple(crop) + for camera_name, crop in body.cameras.items() + if camera_name in request.app.frigate_config.cameras + } + + collect_state_classification_examples(model_name, cameras_normalized) + + return JSONResponse( + content={"success": True, "message": "Example generation completed"}, + status_code=200, + ) + + +@router.post( + "/classification/generate_examples/object", + response_model=GenericResponse, + dependencies=[Depends(require_role(["admin"]))], + summary="Generate object classification examples", +) +async def generate_object_examples(request: Request, body: GenerateObjectExamplesBody): + """Generate examples for object classification.""" + model_name = sanitize_filename(body.model_name) + collect_object_classification_examples(model_name, body.label) + + return JSONResponse( + content={"success": True, "message": "Example generation completed"}, + status_code=200, + ) diff --git a/frigate/api/defs/request/classification_body.py b/frigate/api/defs/request/classification_body.py index dabff0912..fb6a7dd0f 100644 --- a/frigate/api/defs/request/classification_body.py +++ b/frigate/api/defs/request/classification_body.py @@ -1,17 +1,31 @@ -from typing import List +from typing import Dict, List, Tuple from pydantic import BaseModel, Field class RenameFaceBody(BaseModel): - new_name: str + new_name: str = Field(description="New name for the face") class AudioTranscriptionBody(BaseModel): - event_id: str + event_id: str = Field(description="ID of the event to transcribe audio for") class DeleteFaceImagesBody(BaseModel): ids: List[str] = Field( description="List of image filenames to delete from the face folder" ) + + +class GenerateStateExamplesBody(BaseModel): + model_name: str = Field(description="Name of the classification model") + cameras: Dict[str, Tuple[float, float, float, float]] = Field( + description="Dictionary mapping camera names to normalized crop coordinates in [x1, y1, x2, y2] format (values 0-1)" + ) + + +class GenerateObjectExamplesBody(BaseModel): + model_name: str = Field(description="Name of the classification model") + label: str = Field( + description="Object label to collect examples for (e.g., 'person', 'car')" + ) diff --git a/frigate/data_processing/real_time/custom_classification.py b/frigate/data_processing/real_time/custom_classification.py index 1fb9dfc97..ac6387785 100644 --- a/frigate/data_processing/real_time/custom_classification.py +++ b/frigate/data_processing/real_time/custom_classification.py @@ -53,9 +53,17 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): self.tensor_output_details: dict[str, Any] | None = None self.labelmap: dict[int, str] = {} self.classifications_per_second = EventsPerSecond() - self.inference_speed = InferenceSpeed( - self.metrics.classification_speeds[self.model_config.name] - ) + + if ( + self.metrics + and self.model_config.name in self.metrics.classification_speeds + ): + self.inference_speed = InferenceSpeed( + self.metrics.classification_speeds[self.model_config.name] + ) + else: + self.inference_speed = None + self.last_run = datetime.datetime.now().timestamp() self.__build_detector() @@ -83,12 +91,14 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): def __update_metrics(self, duration: float) -> None: self.classifications_per_second.update() - self.inference_speed.update(duration) + if self.inference_speed: + self.inference_speed.update(duration) def process_frame(self, frame_data: dict[str, Any], frame: np.ndarray): - self.metrics.classification_cps[ - self.model_config.name - ].value = self.classifications_per_second.eps() + if self.metrics and self.model_config.name in self.metrics.classification_cps: + self.metrics.classification_cps[ + self.model_config.name + ].value = self.classifications_per_second.eps() camera = frame_data.get("camera") if camera not in self.model_config.state_config.cameras: @@ -223,9 +233,17 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi): self.detected_objects: dict[str, float] = {} self.labelmap: dict[int, str] = {} self.classifications_per_second = EventsPerSecond() - self.inference_speed = InferenceSpeed( - self.metrics.classification_speeds[self.model_config.name] - ) + + if ( + self.metrics + and self.model_config.name in self.metrics.classification_speeds + ): + self.inference_speed = InferenceSpeed( + self.metrics.classification_speeds[self.model_config.name] + ) + else: + self.inference_speed = None + self.__build_detector() @redirect_output_to_logger(logger, logging.DEBUG) @@ -251,12 +269,14 @@ class CustomObjectClassificationProcessor(RealTimeProcessorApi): def __update_metrics(self, duration: float) -> None: self.classifications_per_second.update() - self.inference_speed.update(duration) + if self.inference_speed: + self.inference_speed.update(duration) def process_frame(self, obj_data, frame): - self.metrics.classification_cps[ - self.model_config.name - ].value = self.classifications_per_second.eps() + if self.metrics and self.model_config.name in self.metrics.classification_cps: + self.metrics.classification_cps[ + self.model_config.name + ].value = self.classifications_per_second.eps() if obj_data["false_positive"]: return diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 55e3d57ba..fe04d8b17 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -9,6 +9,7 @@ from typing import Any from peewee import DoesNotExist +from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionSubscriber, DetectionTypeEnum from frigate.comms.embeddings_updater import ( EmbeddingsRequestEnum, @@ -95,6 +96,9 @@ class EmbeddingMaintainer(threading.Thread): CameraConfigUpdateEnum.semantic_search, ], ) + self.classification_config_subscriber = ConfigSubscriber( + "config/classification/custom/" + ) # Configure Frigate DB db = SqliteVecQueueDatabase( @@ -255,6 +259,7 @@ class EmbeddingMaintainer(threading.Thread): """Maintain a SQLite-vec database for semantic search.""" while not self.stop_event.is_set(): self.config_updater.check_for_updates() + self._check_classification_config_updates() self._process_requests() self._process_updates() self._process_recordings_updates() @@ -265,6 +270,7 @@ class EmbeddingMaintainer(threading.Thread): self._process_event_metadata() self.config_updater.stop() + self.classification_config_subscriber.stop() self.event_subscriber.stop() self.event_end_subscriber.stop() self.recordings_subscriber.stop() @@ -275,6 +281,46 @@ class EmbeddingMaintainer(threading.Thread): self.requestor.stop() logger.info("Exiting embeddings maintenance...") + def _check_classification_config_updates(self) -> None: + """Check for classification config updates and add new processors.""" + topic, model_config = self.classification_config_subscriber.check_for_update() + + if topic and model_config: + model_name = topic.split("/")[-1] + self.config.classification.custom[model_name] = model_config + + # Check if processor already exists + for processor in self.realtime_processors: + if isinstance( + processor, + ( + CustomStateClassificationProcessor, + CustomObjectClassificationProcessor, + ), + ): + if processor.model_config.name == model_name: + logger.debug( + f"Classification processor for model {model_name} already exists, skipping" + ) + return + + if model_config.state_config is not None: + processor = CustomStateClassificationProcessor( + self.config, model_config, self.requestor, self.metrics + ) + else: + processor = CustomObjectClassificationProcessor( + self.config, + model_config, + self.event_metadata_publisher, + self.metrics, + ) + + self.realtime_processors.append(processor) + logger.info( + f"Added classification processor for model: {model_name} (type: {type(processor).__name__})" + ) + def _process_requests(self) -> None: """Process embeddings requests""" diff --git a/frigate/util/classification.py b/frigate/util/classification.py index e4133ded4..ab17a9444 100644 --- a/frigate/util/classification.py +++ b/frigate/util/classification.py @@ -2,12 +2,15 @@ import logging import os +import random +from collections import defaultdict import cv2 import numpy as np from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsRequestor from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FfmpegConfig from frigate.const import ( CLIPS_DIR, MODEL_CACHE_DIR, @@ -15,7 +18,10 @@ from frigate.const import ( UPDATE_MODEL_STATE, ) from frigate.log import redirect_output_to_logger +from frigate.models import Event, Recordings, ReviewSegment from frigate.types import ModelStatusTypesEnum +from frigate.util.image import get_image_from_recording +from frigate.util.path import get_event_thumbnail_bytes from frigate.util.process import FrigateProcess BATCH_SIZE = 16 @@ -69,6 +75,7 @@ class ClassificationTrainingProcess(FrigateProcess): logger.info(f"Kicking off classification training for {self.model_name}.") dataset_dir = os.path.join(CLIPS_DIR, self.model_name, "dataset") model_dir = os.path.join(MODEL_CACHE_DIR, self.model_name) + os.makedirs(model_dir, exist_ok=True) num_classes = len( [ d @@ -139,7 +146,6 @@ class ClassificationTrainingProcess(FrigateProcess): f.write(tflite_model) -@staticmethod def kickoff_model_training( embeddingRequestor: EmbeddingsRequestor, model_name: str ) -> None: @@ -172,3 +178,520 @@ def kickoff_model_training( }, ) requestor.stop() + + +@staticmethod +def collect_state_classification_examples( + model_name: str, cameras: dict[str, tuple[float, float, float, float]] +) -> None: + """ + Collect representative state classification examples from review items. + + This function: + 1. Queries review items from specified cameras + 2. Selects 100 balanced timestamps across the data + 3. Extracts keyframes from recordings (cropped to specified regions) + 4. Selects 20 most visually distinct images + 5. Saves them to the dataset directory + + Args: + model_name: Name of the classification model + cameras: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1) + """ + dataset_dir = os.path.join(CLIPS_DIR, model_name, "dataset") + temp_dir = os.path.join(dataset_dir, "temp") + os.makedirs(temp_dir, exist_ok=True) + + # Step 1: Get review items for the cameras + camera_names = list(cameras.keys()) + review_items = list( + ReviewSegment.select() + .where(ReviewSegment.camera.in_(camera_names)) + .where(ReviewSegment.end_time.is_null(False)) + .order_by(ReviewSegment.start_time.asc()) + ) + + if not review_items: + logger.warning(f"No review items found for cameras: {camera_names}") + return + + # Step 2: Create balanced timestamp selection (100 samples) + timestamps = _select_balanced_timestamps(review_items, target_count=100) + + # Step 3: Extract keyframes from recordings with crops applied + keyframes = _extract_keyframes( + "/usr/lib/ffmpeg/7.0/bin/ffmpeg", timestamps, temp_dir, cameras + ) + + # Step 4: Select 24 most visually distinct images (they're already cropped) + distinct_images = _select_distinct_images(keyframes, target_count=24) + + # Step 5: Save to train directory for later classification + train_dir = os.path.join(CLIPS_DIR, model_name, "train") + os.makedirs(train_dir, exist_ok=True) + + saved_count = 0 + for idx, image_path in enumerate(distinct_images): + dest_path = os.path.join(train_dir, f"example_{idx:03d}.jpg") + try: + img = cv2.imread(image_path) + + if img is not None: + cv2.imwrite(dest_path, img) + saved_count += 1 + except Exception as e: + logger.error(f"Failed to save image {image_path}: {e}") + + import shutil + + try: + shutil.rmtree(temp_dir) + except Exception as e: + logger.warning(f"Failed to clean up temp directory: {e}") + + +def _select_balanced_timestamps( + review_items: list[ReviewSegment], target_count: int = 100 +) -> list[dict]: + """ + Select balanced timestamps from review items. + + Strategy: + - Group review items by camera and time of day + - Sample evenly across groups to ensure diversity + - For each selected review item, pick a random timestamp within its duration + + Returns: + List of dicts with keys: camera, timestamp, review_item + """ + # Group by camera and hour of day for temporal diversity + grouped = defaultdict(list) + + for item in review_items: + camera = item.camera + # Group by 6-hour blocks for temporal diversity + hour_block = int(item.start_time // (6 * 3600)) + key = f"{camera}_{hour_block}" + grouped[key].append(item) + + # Calculate how many samples per group + num_groups = len(grouped) + if num_groups == 0: + return [] + + samples_per_group = max(1, target_count // num_groups) + timestamps = [] + + # Sample from each group + for group_items in grouped.values(): + # Take samples_per_group items from this group + sample_size = min(samples_per_group, len(group_items)) + sampled_items = random.sample(group_items, sample_size) + + for item in sampled_items: + # Pick a random timestamp within the review item's duration + duration = item.end_time - item.start_time + if duration <= 0: + continue + + # Sample from middle 80% to avoid edge artifacts + offset = random.uniform(duration * 0.1, duration * 0.9) + timestamp = item.start_time + offset + + timestamps.append( + { + "camera": item.camera, + "timestamp": timestamp, + "review_item": item, + } + ) + + # If we don't have enough, sample more from larger groups + while len(timestamps) < target_count and len(timestamps) < len(review_items): + for group_items in grouped.values(): + if len(timestamps) >= target_count: + break + + # Pick a random item not already sampled + item = random.choice(group_items) + duration = item.end_time - item.start_time + if duration <= 0: + continue + + offset = random.uniform(duration * 0.1, duration * 0.9) + timestamp = item.start_time + offset + + # Check if we already have a timestamp near this one + if not any(abs(t["timestamp"] - timestamp) < 1.0 for t in timestamps): + timestamps.append( + { + "camera": item.camera, + "timestamp": timestamp, + "review_item": item, + } + ) + + return timestamps[:target_count] + + +def _extract_keyframes( + ffmpeg_path: str, + timestamps: list[dict], + output_dir: str, + camera_crops: dict[str, tuple[float, float, float, float]], +) -> list[str]: + """ + Extract keyframes from recordings at specified timestamps and crop to specified regions. + + Args: + ffmpeg_path: Path to ffmpeg binary + timestamps: List of timestamp dicts from _select_balanced_timestamps + output_dir: Directory to save extracted frames + camera_crops: Dict mapping camera names to normalized crop coordinates [x1, y1, x2, y2] (0-1) + + Returns: + List of paths to successfully extracted and cropped keyframe images + """ + keyframe_paths = [] + + for idx, ts_info in enumerate(timestamps): + camera = ts_info["camera"] + timestamp = ts_info["timestamp"] + + if camera not in camera_crops: + logger.warning(f"No crop coordinates for camera {camera}") + continue + + norm_x1, norm_y1, norm_x2, norm_y2 = camera_crops[camera] + + try: + recording = ( + Recordings.select() + .where( + (timestamp >= Recordings.start_time) + & (timestamp <= Recordings.end_time) + & (Recordings.camera == camera) + ) + .order_by(Recordings.start_time.desc()) + .limit(1) + .get() + ) + except Exception: + continue + + relative_time = timestamp - recording.start_time + + try: + config = FfmpegConfig(path="/usr/lib/ffmpeg/7.0") + image_data = get_image_from_recording( + config, + recording.path, + relative_time, + codec="mjpeg", + height=None, + ) + + if image_data: + nparr = np.frombuffer(image_data, np.uint8) + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + + if img is not None: + height, width = img.shape[:2] + + x1 = int(norm_x1 * width) + y1 = int(norm_y1 * height) + x2 = int(norm_x2 * width) + y2 = int(norm_y2 * height) + + x1_clipped = max(0, min(x1, width)) + y1_clipped = max(0, min(y1, height)) + x2_clipped = max(0, min(x2, width)) + y2_clipped = max(0, min(y2, height)) + + if x2_clipped > x1_clipped and y2_clipped > y1_clipped: + cropped = img[y1_clipped:y2_clipped, x1_clipped:x2_clipped] + resized = cv2.resize(cropped, (224, 224)) + + output_path = os.path.join(output_dir, f"frame_{idx:04d}.jpg") + cv2.imwrite(output_path, resized) + keyframe_paths.append(output_path) + + except Exception as e: + logger.debug( + f"Failed to extract frame from {recording.path} at {relative_time}s: {e}" + ) + continue + + return keyframe_paths + + +def _select_distinct_images( + image_paths: list[str], target_count: int = 20 +) -> list[str]: + """ + Select the most visually distinct images from a set of keyframes. + + Uses a greedy algorithm based on image histograms: + 1. Start with a random image + 2. Iteratively add the image that is most different from already selected images + 3. Difference is measured using histogram comparison + + Args: + image_paths: List of paths to candidate images + target_count: Number of distinct images to select + + Returns: + List of paths to selected images + """ + if len(image_paths) <= target_count: + return image_paths + + histograms = {} + valid_paths = [] + + for path in image_paths: + try: + img = cv2.imread(path) + + if img is None: + continue + + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + hist = cv2.calcHist( + [hsv], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256] + ) + hist = cv2.normalize(hist, hist).flatten() + histograms[path] = hist + valid_paths.append(path) + except Exception as e: + logger.debug(f"Failed to process image {path}: {e}") + continue + + if len(valid_paths) <= target_count: + return valid_paths + + selected = [] + first_image = random.choice(valid_paths) + selected.append(first_image) + remaining = [p for p in valid_paths if p != first_image] + + while len(selected) < target_count and remaining: + max_min_distance = -1 + best_candidate = None + + for candidate in remaining: + min_distance = float("inf") + + for selected_img in selected: + distance = cv2.compareHist( + histograms[candidate], + histograms[selected_img], + cv2.HISTCMP_BHATTACHARYYA, + ) + min_distance = min(min_distance, distance) + + if min_distance > max_min_distance: + max_min_distance = min_distance + best_candidate = candidate + + if best_candidate: + selected.append(best_candidate) + remaining.remove(best_candidate) + else: + break + + return selected + + +@staticmethod +def collect_object_classification_examples( + model_name: str, + label: str, +) -> None: + """ + Collect representative object classification examples from event thumbnails. + + This function: + 1. Queries events for the specified label + 2. Selects 100 balanced events across different cameras and times + 3. Retrieves thumbnails for selected events (with 33% center crop applied) + 4. Selects 24 most visually distinct thumbnails + 5. Saves to dataset directory + + Args: + model_name: Name of the classification model + label: Object label to collect (e.g., "person", "car") + cameras: List of camera names to collect examples from + """ + dataset_dir = os.path.join(CLIPS_DIR, model_name, "dataset") + temp_dir = os.path.join(dataset_dir, "temp") + os.makedirs(temp_dir, exist_ok=True) + + # Step 1: Query events for the specified label and cameras + events = list( + Event.select().where((Event.label == label)).order_by(Event.start_time.asc()) + ) + + if not events: + logger.warning(f"No events found for label '{label}'") + return + + logger.debug(f"Found {len(events)} events") + + # Step 2: Select balanced events (100 samples) + selected_events = _select_balanced_events(events, target_count=100) + logger.debug(f"Selected {len(selected_events)} events") + + # Step 3: Extract thumbnails from events + thumbnails = _extract_event_thumbnails(selected_events, temp_dir) + logger.debug(f"Successfully extracted {len(thumbnails)} thumbnails") + + # Step 4: Select 24 most visually distinct thumbnails + distinct_images = _select_distinct_images(thumbnails, target_count=24) + logger.debug(f"Selected {len(distinct_images)} distinct images") + + # Step 5: Save to train directory for later classification + train_dir = os.path.join(CLIPS_DIR, model_name, "train") + os.makedirs(train_dir, exist_ok=True) + + saved_count = 0 + for idx, image_path in enumerate(distinct_images): + dest_path = os.path.join(train_dir, f"example_{idx:03d}.jpg") + try: + img = cv2.imread(image_path) + + if img is not None: + cv2.imwrite(dest_path, img) + saved_count += 1 + except Exception as e: + logger.error(f"Failed to save image {image_path}: {e}") + + import shutil + + try: + shutil.rmtree(temp_dir) + except Exception as e: + logger.warning(f"Failed to clean up temp directory: {e}") + + logger.debug( + f"Successfully collected {saved_count} classification examples in {train_dir}" + ) + + +def _select_balanced_events( + events: list[Event], target_count: int = 100 +) -> list[Event]: + """ + Select balanced events from the event list. + + Strategy: + - Group events by camera and time of day + - Sample evenly across groups to ensure diversity + - Prioritize events with higher scores + + Returns: + List of selected events + """ + grouped = defaultdict(list) + + for event in events: + camera = event.camera + hour_block = int(event.start_time // (6 * 3600)) + key = f"{camera}_{hour_block}" + grouped[key].append(event) + + num_groups = len(grouped) + if num_groups == 0: + return [] + + samples_per_group = max(1, target_count // num_groups) + selected = [] + + for group_events in grouped.values(): + sorted_events = sorted( + group_events, + key=lambda e: e.data.get("score", 0) if e.data else 0, + reverse=True, + ) + + sample_size = min(samples_per_group, len(sorted_events)) + selected.extend(sorted_events[:sample_size]) + + if len(selected) < target_count: + remaining = [e for e in events if e not in selected] + remaining_sorted = sorted( + remaining, + key=lambda e: e.data.get("score", 0) if e.data else 0, + reverse=True, + ) + needed = target_count - len(selected) + selected.extend(remaining_sorted[:needed]) + + return selected[:target_count] + + +def _extract_event_thumbnails(events: list[Event], output_dir: str) -> list[str]: + """ + Extract thumbnails from events and save to disk. + + Args: + events: List of Event objects + output_dir: Directory to save thumbnails + + Returns: + List of paths to successfully extracted thumbnail images + """ + thumbnail_paths = [] + + for idx, event in enumerate(events): + try: + thumbnail_bytes = get_event_thumbnail_bytes(event) + + if thumbnail_bytes: + nparr = np.frombuffer(thumbnail_bytes, np.uint8) + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + + if img is not None: + height, width = img.shape[:2] + + crop_size = 1.0 + if event.data and "box" in event.data and "region" in event.data: + box = event.data["box"] + region = event.data["region"] + + if len(box) == 4 and len(region) == 4: + box_w, box_h = box[2], box[3] + region_w, region_h = region[2], region[3] + + box_area = (box_w * box_h) / (region_w * region_h) + + if box_area < 0.05: + crop_size = 0.4 + elif box_area < 0.10: + crop_size = 0.5 + elif box_area < 0.20: + crop_size = 0.65 + elif box_area < 0.35: + crop_size = 0.80 + else: + crop_size = 0.95 + + crop_width = int(width * crop_size) + crop_height = int(height * crop_size) + + x1 = (width - crop_width) // 2 + y1 = (height - crop_height) // 2 + x2 = x1 + crop_width + y2 = y1 + crop_height + + cropped = img[y1:y2, x1:x2] + resized = cv2.resize(cropped, (224, 224)) + output_path = os.path.join(output_dir, f"thumbnail_{idx:04d}.jpg") + cv2.imwrite(output_path, resized) + thumbnail_paths.append(output_path) + + except Exception as e: + logger.debug(f"Failed to extract thumbnail for event {event.id}: {e}") + continue + + return thumbnail_paths diff --git a/web/public/locales/en/views/classificationModel.json b/web/public/locales/en/views/classificationModel.json index dcfc5a1b2..a13870221 100644 --- a/web/public/locales/en/views/classificationModel.json +++ b/web/public/locales/en/views/classificationModel.json @@ -1,4 +1,5 @@ { + "documentTitle": "Classification Models", "button": { "deleteClassificationAttempts": "Delete Classification Images", "renameCategory": "Rename Class", @@ -50,8 +51,85 @@ }, "categorizeImageAs": "Classify Image As:", "categorizeImage": "Classify Image", + "noModels": { + "object": { + "title": "No Object Classification Models", + "description": "Create a custom model to classify detected objects.", + "buttonText": "Create Object Model" + }, + "state": { + "title": "No State Classification Models", + "description": "Create a custom model to monitor and classify state changes in specific camera areas.", + "buttonText": "Create State Model" + } + }, "wizard": { "title": "Create New Classification", - "description": "Create a new state or object classification model." + "steps": { + "nameAndDefine": "Name & Define", + "stateArea": "State Area", + "chooseExamples": "Choose Examples" + }, + "step1": { + "description": "State models monitor fixed camera areas for changes (e.g., door open/closed). Object models add classifications to detected objects (e.g., known animals, delivery persons, etc.).", + "name": "Name", + "namePlaceholder": "Enter model name...", + "type": "Type", + "typeState": "State", + "typeObject": "Object", + "objectLabel": "Object Label", + "objectLabelPlaceholder": "Select object type...", + "classificationType": "Classification Type", + "classificationTypeTip": "Learn about classification types", + "classificationTypeDesc": "Sub Labels add additional text to the object label (e.g., 'Person: UPS'). Attributes are searchable metadata stored separately in the object metadata.", + "classificationSubLabel": "Sub Label", + "classificationAttribute": "Attribute", + "classes": "Classes", + "classesTip": "Learn about classes", + "classesStateDesc": "Define the different states your camera area can be in. For example: 'open' and 'closed' for a garage door.", + "classesObjectDesc": "Define the different categories to classify detected objects into. For example: 'delivery_person', 'resident', 'stranger' for person classification.", + "classPlaceholder": "Enter class name...", + "errors": { + "nameRequired": "Model name is required", + "nameLength": "Model name must be 64 characters or less", + "nameOnlyNumbers": "Model name cannot contain only numbers", + "classRequired": "At least 1 class is required", + "classesUnique": "Class names must be unique", + "stateRequiresTwoClasses": "State models require at least 2 classes", + "objectLabelRequired": "Please select an object label", + "objectTypeRequired": "Please select a classification type" + } + }, + "step2": { + "description": "Select cameras and define the area to monitor for each camera. The model will classify the state of these areas.", + "cameras": "Cameras", + "selectCamera": "Select Camera", + "noCameras": "Click + to add cameras", + "selectCameraPrompt": "Select a camera from the list to define its monitoring area" + }, + "step3": { + "selectImagesPrompt": "Select all images with: {{className}}", + "selectImagesDescription": "Click on images to select them. Click Continue when you're done with this class.", + "generating": { + "title": "Generating Sample Images", + "description": "Frigate is pulling representative images from your recordings. This may take a moment..." + }, + "training": { + "title": "Training Model", + "description": "Your model is being trained in the background. Close this dialog, and your model will start running as soon as training is complete." + }, + "retryGenerate": "Retry Generation", + "noImages": "No sample images generated", + "classifying": "Classifying & Training...", + "trainingStarted": "Training started successfully", + "errors": { + "noCameras": "No cameras configured", + "noObjectLabel": "No object label selected", + "generateFailed": "Failed to generate examples: {{error}}", + "generationFailed": "Generation failed. Please try again.", + "classifyFailed": "Failed to classify images: {{error}}" + }, + "generateSuccess": "Successfully generated sample images" + } } } diff --git a/web/public/locales/en/views/faceLibrary.json b/web/public/locales/en/views/faceLibrary.json index 6febf85f0..08050977e 100644 --- a/web/public/locales/en/views/faceLibrary.json +++ b/web/public/locales/en/views/faceLibrary.json @@ -5,10 +5,6 @@ "invalidName": "Invalid name. Names can only include letters, numbers, spaces, apostrophes, underscores, and hyphens." }, "details": { - "subLabelScore": "Sub Label Score", - "scoreInfo": "The sub label score is the weighted score for all of the recognized face confidences, so this may differ from the score shown on the snapshot.", - "face": "Face Details", - "faceDesc": "Details of the tracked object that generated this face", "timestamp": "Timestamp", "unknown": "Unknown" }, @@ -19,8 +15,6 @@ }, "collections": "Collections", "createFaceLibrary": { - "title": "Create Collection", - "desc": "Create a new collection", "new": "Create New Face", "nextSteps": "To build a strong foundation:
setImageLoaded(true)}
+ />
+ + {t("wizard.step3.training.description")} +
++ {t("wizard.step3.generating.description")} +
++ {t("wizard.step3.selectImagesDescription")} +
++ {t("wizard.step3.noImages")} +
+ ++ {t("wizard.step3.classifying")} +
++ {t("wizard.step3.errors.generationFailed")} +
+ +