From 9ab6bf060358c3b42a663ab6e941ab1e416dfe69 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 17 Sep 2024 10:56:00 -0600 Subject: [PATCH] Set caching options for hardware providers --- docker/main/requirements-wheels-nodeps.txt | 2 + docker/main/requirements-wheels.txt | 2 - frigate/embeddings/functions/clip.py | 63 +++++++++++++++++++++- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/docker/main/requirements-wheels-nodeps.txt b/docker/main/requirements-wheels-nodeps.txt index 84eac63c2..62bca937b 100644 --- a/docker/main/requirements-wheels-nodeps.txt +++ b/docker/main/requirements-wheels-nodeps.txt @@ -1 +1,3 @@ +# Embeddings +chromadb == 0.5.0 onnx_clip == 4.0.* diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 83265f0b7..e0db1d7b7 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -32,8 +32,6 @@ unidecode == 1.3.* openvino == 2024.1.* onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64' onnxruntime == 1.18.* ; platform_machine == 'aarch64' -# Embeddings -chromadb == 0.5.0 # Generative AI google-generativeai == 0.6.* ollama == 0.2.* diff --git a/frigate/embeddings/functions/clip.py b/frigate/embeddings/functions/clip.py index 867938aff..16d020cf0 100644 --- a/frigate/embeddings/functions/clip.py +++ b/frigate/embeddings/functions/clip.py @@ -1,9 +1,13 @@ """CLIP Embeddings for Frigate.""" +import errno +import logging import os +from pathlib import Path from typing import Tuple, Union import onnxruntime as ort +import requests from chromadb import EmbeddingFunction, Embeddings from chromadb.api.types import ( Documents, @@ -39,10 +43,67 @@ class Clip(OnnxClip): models = [] for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]: path = os.path.join(MODEL_CACHE_DIR, "clip", model_file) - models.append(OnnxClip._load_model(path, silent)) + models.append(Clip._load_model(path, silent)) return models[0], models[1] + @staticmethod + def _load_model(path: str, silent: bool): + providers = ort.get_available_providers() + options = [] + + for provider in providers: + if provider == "TensorrtExecutionProvider": + options.append( + { + "trt_timing_cache_enable": True, + "trt_timing_cache_path": "/config/model_cache/tensorrt/ort", + "trt_engine_cache_enable": True, + "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines", + } + ) + elif provider == "OpenVINOExecutionProvider": + options.append({"cache_dir": "/config/model_cache/openvino/ort"}) + else: + options.append({}) + + try: + if os.path.exists(path): + return ort.InferenceSession(path, providers=providers, options=options) + else: + raise FileNotFoundError( + errno.ENOENT, + os.strerror(errno.ENOENT), + path, + ) + except Exception: + s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}" + if not silent: + logging.info( + f"The model file ({path}) doesn't exist " + f"or it is invalid. Downloading it from the public S3 " + f"bucket: {s3_url}." # noqa: E501 + ) + + # Download from S3 + # Saving to a temporary file first to avoid corrupting the file + temporary_filename = Path(path).with_name(os.path.basename(path) + ".part") + + # Create any missing directories in the path + temporary_filename.parent.mkdir(parents=True, exist_ok=True) + + with requests.get(s3_url, stream=True) as r: + r.raise_for_status() + with open(temporary_filename, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + f.flush() + # Finally move the temporary file to the correct location + temporary_filename.rename(path) + return ort.InferenceSession( + path, providers=provider, provider_options=optionsm + ) + class ClipEmbedding(EmbeddingFunction): """Embedding function for CLIP model used in Chroma."""