From c3fa0cefcace2a78e2c8a8ebfc523960cd335fae Mon Sep 17 00:00:00 2001 From: leccelecce <24962424+leccelecce@users.noreply.github.com> Date: Sun, 22 Dec 2024 20:30:02 +0000 Subject: [PATCH] GenAI: add ability to save JPGs sent to provider --- docs/docs/configuration/genai.md | 3 +++ docs/docs/configuration/reference.md | 2 ++ frigate/config/camera/genai.py | 3 +++ frigate/embeddings/maintainer.py | 26 +++++++++++++++++++++++++- 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md index 9d5f62b8c..221061d9d 100644 --- a/docs/docs/configuration/genai.md +++ b/docs/docs/configuration/genai.md @@ -180,6 +180,8 @@ Prompts can also be overriden at the camera level to provide a more detailed pro Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the uncompressed images from the `detect` stream collected over the object's lifetime to the model. Once the object lifecycle ends, only a single compressed and cropped thumbnail is saved with the tracked object. Using a snapshot might be useful when you want to _regenerate_ a tracked object's description as it will provide the AI with a higher-quality image (typically downscaled by the AI itself) than the cropped/compressed thumbnail. Using a snapshot otherwise has a trade-off in that only a single image is sent to your provider, which will limit the model's ability to determine object movement or direction. +If not using `use_snapshot` and you want to review the exact images being provided, you can set `save_thumbnails` to `True`. Images will be saved as JPGs in the clips directory for each event. + ```yaml cameras: front_door: @@ -194,6 +196,7 @@ cameras: - cat required_zones: - steps + save_thumbnails: False ``` ### Experiment with prompts diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index b13b137d2..4ab2a4b5d 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -760,6 +760,8 @@ cameras: - cat # Optional: Restrict generation to objects that entered any of the listed zones (default: none, all zones qualify) required_zones: [] + # Optional: Save thumbnails sent to generative AI for review/debugging purposes (default: shown below) + save_thumbnails: False # Optional ui: diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py index 35c26eaf8..0239d15d7 100644 --- a/frigate/config/camera/genai.py +++ b/frigate/config/camera/genai.py @@ -38,6 +38,9 @@ class GenAICameraConfig(BaseModel): default_factory=list, title="List of required zones to be entered in order to run generative AI.", ) + save_thumbnails: bool = Field( + default=False, title="Save thumbnails sent to generative AI." + ) @field_validator("required_zones", mode="before") @classmethod diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 4f81ec2d6..be9ea6e44 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -217,6 +217,8 @@ class EmbeddingMaintainer(threading.Thread): _, buffer = cv2.imencode(".jpg", cropped_image) snapshot_image = buffer.tobytes() + num_thumbnails = len(self.tracked_events.get(event_id, [])) + embed_image = ( [snapshot_image] if event.has_snapshot and camera_config.genai.use_snapshot @@ -225,11 +227,33 @@ class EmbeddingMaintainer(threading.Thread): data["thumbnail"] for data in self.tracked_events[event_id] ] - if len(self.tracked_events.get(event_id, [])) > 0 + if num_thumbnails > 0 else [thumbnail] ) ) + if camera_config.genai.save_thumbnails and num_thumbnails > 0: + logger.debug( + f"Saving {num_thumbnails} thumbnails for event {event.id}" + ) + + for idx, data in enumerate(self.tracked_events[event_id]): + jpg_bytes: bytes = data["thumbnail"] + + if jpg_bytes is None: + logger.warning( + f"Unable to save thumbnail {idx} for {event.id}." + ) + else: + with open( + os.path.join( + CLIPS_DIR, + f"thumb-{camera}-{event.id}-{idx}.jpg", + ), + "wb", + ) as j: + j.write(jpg_bytes) + # Generate the description. Call happens in a thread since it is network bound. threading.Thread( target=self._embed_description,