From c3fa0cefcace2a78e2c8a8ebfc523960cd335fae Mon Sep 17 00:00:00 2001
From: leccelecce <24962424+leccelecce@users.noreply.github.com>
Date: Sun, 22 Dec 2024 20:30:02 +0000
Subject: [PATCH] GenAI: add ability to save JPGs sent to provider

---
 docs/docs/configuration/genai.md     |  3 +++
 docs/docs/configuration/reference.md |  2 ++
 frigate/config/camera/genai.py       |  3 +++
 frigate/embeddings/maintainer.py     | 26 +++++++++++++++++++++++++-
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md
index 9d5f62b8c..221061d9d 100644
--- a/docs/docs/configuration/genai.md
+++ b/docs/docs/configuration/genai.md
@@ -180,6 +180,8 @@ Prompts can also be overriden at the camera level to provide a more detailed pro
 
 Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the uncompressed images from the `detect` stream collected over the object's lifetime to the model. Once the object lifecycle ends, only a single compressed and cropped thumbnail is saved with the tracked object. Using a snapshot might be useful when you want to _regenerate_ a tracked object's description as it will provide the AI with a higher-quality image (typically downscaled by the AI itself) than the cropped/compressed thumbnail. Using a snapshot otherwise has a trade-off in that only a single image is sent to your provider, which will limit the model's ability to determine object movement or direction.
 
+If not using `use_snapshot` and you want to review the exact images being provided, you can set `save_thumbnails` to `True`. Images will be saved as JPGs in the clips directory for each event.
+
 ```yaml
 cameras:
   front_door:
@@ -194,6 +196,7 @@ cameras:
         - cat
       required_zones:
         - steps
+      save_thumbnails: False
 ```
 
 ### Experiment with prompts
diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
index b13b137d2..4ab2a4b5d 100644
--- a/docs/docs/configuration/reference.md
+++ b/docs/docs/configuration/reference.md
@@ -760,6 +760,8 @@ cameras:
         - cat
       # Optional: Restrict generation to objects that entered any of the listed zones (default: none, all zones qualify)
       required_zones: []
+      # Optional: Save thumbnails sent to generative AI for review/debugging purposes (default: shown below)
+      save_thumbnails: False
 
 # Optional
 ui:
diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py
index 35c26eaf8..0239d15d7 100644
--- a/frigate/config/camera/genai.py
+++ b/frigate/config/camera/genai.py
@@ -38,6 +38,9 @@ class GenAICameraConfig(BaseModel):
         default_factory=list,
         title="List of required zones to be entered in order to run generative AI.",
     )
+    save_thumbnails: bool = Field(
+        default=False, title="Save thumbnails sent to generative AI."
+    )
 
     @field_validator("required_zones", mode="before")
     @classmethod
diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py
index 4f81ec2d6..be9ea6e44 100644
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@@ -217,6 +217,8 @@ class EmbeddingMaintainer(threading.Thread):
                             _, buffer = cv2.imencode(".jpg", cropped_image)
                             snapshot_image = buffer.tobytes()
 
+                    num_thumbnails = len(self.tracked_events.get(event_id, []))
+
                     embed_image = (
                         [snapshot_image]
                         if event.has_snapshot and camera_config.genai.use_snapshot
@@ -225,11 +227,33 @@ class EmbeddingMaintainer(threading.Thread):
                                 data["thumbnail"]
                                 for data in self.tracked_events[event_id]
                             ]
-                            if len(self.tracked_events.get(event_id, [])) > 0
+                            if num_thumbnails > 0
                             else [thumbnail]
                         )
                     )
 
+                    if camera_config.genai.save_thumbnails and num_thumbnails > 0:
+                        logger.debug(
+                            f"Saving {num_thumbnails} thumbnails for event {event.id}"
+                        )
+
+                        for idx, data in enumerate(self.tracked_events[event_id]):
+                            jpg_bytes: bytes = data["thumbnail"]
+
+                            if jpg_bytes is None:
+                                logger.warning(
+                                    f"Unable to save thumbnail {idx} for {event.id}."
+                                )
+                            else:
+                                with open(
+                                    os.path.join(
+                                        CLIPS_DIR,
+                                        f"thumb-{camera}-{event.id}-{idx}.jpg",
+                                    ),
+                                    "wb",
+                                ) as j:
+                                    j.write(jpg_bytes)
+
                     # Generate the description. Call happens in a thread since it is network bound.
                     threading.Thread(
                         target=self._embed_description,