From 9ab6bf060358c3b42a663ab6e941ab1e416dfe69 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Tue, 17 Sep 2024 10:56:00 -0600
Subject: [PATCH] Set caching options for hardware providers

---
 docker/main/requirements-wheels-nodeps.txt |  2 +
 docker/main/requirements-wheels.txt        |  2 -
 frigate/embeddings/functions/clip.py       | 63 +++++++++++++++++++++-
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/docker/main/requirements-wheels-nodeps.txt b/docker/main/requirements-wheels-nodeps.txt
index 84eac63c2..62bca937b 100644
--- a/docker/main/requirements-wheels-nodeps.txt
+++ b/docker/main/requirements-wheels-nodeps.txt
@@ -1 +1,3 @@
+# Embeddings
+chromadb == 0.5.0
 onnx_clip == 4.0.*
diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
index 83265f0b7..e0db1d7b7 100644
--- a/docker/main/requirements-wheels.txt
+++ b/docker/main/requirements-wheels.txt
@@ -32,8 +32,6 @@ unidecode == 1.3.*
 openvino == 2024.1.*
 onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
 onnxruntime == 1.18.* ; platform_machine == 'aarch64'
-# Embeddings
-chromadb == 0.5.0
 # Generative AI
 google-generativeai == 0.6.*
 ollama == 0.2.*
diff --git a/frigate/embeddings/functions/clip.py b/frigate/embeddings/functions/clip.py
index 867938aff..16d020cf0 100644
--- a/frigate/embeddings/functions/clip.py
+++ b/frigate/embeddings/functions/clip.py
@@ -1,9 +1,13 @@
 """CLIP Embeddings for Frigate."""
 
+import errno
+import logging
 import os
+from pathlib import Path
 from typing import Tuple, Union
 
 import onnxruntime as ort
+import requests
 from chromadb import EmbeddingFunction, Embeddings
 from chromadb.api.types import (
     Documents,
@@ -39,10 +43,67 @@ class Clip(OnnxClip):
         models = []
         for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
             path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
-            models.append(OnnxClip._load_model(path, silent))
+            models.append(Clip._load_model(path, silent))
 
         return models[0], models[1]
 
+    @staticmethod
+    def _load_model(path: str, silent: bool):
+        providers = ort.get_available_providers()
+        options = []
+
+        for provider in providers:
+            if provider == "TensorrtExecutionProvider":
+                options.append(
+                    {
+                        "trt_timing_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_enable": True,
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            elif provider == "OpenVINOExecutionProvider":
+                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
+            else:
+                options.append({})
+
+        try:
+            if os.path.exists(path):
+                return ort.InferenceSession(path, providers=providers, options=options)
+            else:
+                raise FileNotFoundError(
+                    errno.ENOENT,
+                    os.strerror(errno.ENOENT),
+                    path,
+                )
+        except Exception:
+            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
+            if not silent:
+                logging.info(
+                    f"The model file ({path}) doesn't exist "
+                    f"or it is invalid. Downloading it from the public S3 "
+                    f"bucket: {s3_url}."  # noqa: E501
+                )
+
+            # Download from S3
+            # Saving to a temporary file first to avoid corrupting the file
+            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
+
+            # Create any missing directories in the path
+            temporary_filename.parent.mkdir(parents=True, exist_ok=True)
+
+            with requests.get(s3_url, stream=True) as r:
+                r.raise_for_status()
+                with open(temporary_filename, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                    f.flush()
+            # Finally move the temporary file to the correct location
+            temporary_filename.rename(path)
+            return ort.InferenceSession(
+                path, providers=provider, provider_options=optionsm
+            )
+
 
 class ClipEmbedding(EmbeddingFunction):
     """Embedding function for CLIP model used in Chroma."""