Set caching options for hardware providers

This commit is contained in:
Nicolas Mowen 2024-09-17 10:56:00 -06:00
parent 90d7fc6bc5
commit 9ab6bf0603
3 changed files with 64 additions and 3 deletions

View File

@ -1 +1,3 @@
# Embeddings
chromadb == 0.5.0
onnx_clip == 4.0.* onnx_clip == 4.0.*

View File

@ -32,8 +32,6 @@ unidecode == 1.3.*
openvino == 2024.1.* openvino == 2024.1.*
onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64' onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
onnxruntime == 1.18.* ; platform_machine == 'aarch64' onnxruntime == 1.18.* ; platform_machine == 'aarch64'
# Embeddings
chromadb == 0.5.0
# Generative AI # Generative AI
google-generativeai == 0.6.* google-generativeai == 0.6.*
ollama == 0.2.* ollama == 0.2.*

View File

@ -1,9 +1,13 @@
"""CLIP Embeddings for Frigate.""" """CLIP Embeddings for Frigate."""
import errno
import logging
import os import os
from pathlib import Path
from typing import Tuple, Union from typing import Tuple, Union
import onnxruntime as ort import onnxruntime as ort
import requests
from chromadb import EmbeddingFunction, Embeddings from chromadb import EmbeddingFunction, Embeddings
from chromadb.api.types import ( from chromadb.api.types import (
Documents, Documents,
@ -39,10 +43,67 @@ class Clip(OnnxClip):
models = [] models = []
for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]: for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
path = os.path.join(MODEL_CACHE_DIR, "clip", model_file) path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
models.append(OnnxClip._load_model(path, silent)) models.append(Clip._load_model(path, silent))
return models[0], models[1] return models[0], models[1]
@staticmethod
def _load_model(path: str, silent: bool):
providers = ort.get_available_providers()
options = []
for provider in providers:
if provider == "TensorrtExecutionProvider":
options.append(
{
"trt_timing_cache_enable": True,
"trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
"trt_engine_cache_enable": True,
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
}
)
elif provider == "OpenVINOExecutionProvider":
options.append({"cache_dir": "/config/model_cache/openvino/ort"})
else:
options.append({})
try:
if os.path.exists(path):
return ort.InferenceSession(path, providers=providers, options=options)
else:
raise FileNotFoundError(
errno.ENOENT,
os.strerror(errno.ENOENT),
path,
)
except Exception:
s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
if not silent:
logging.info(
f"The model file ({path}) doesn't exist "
f"or it is invalid. Downloading it from the public S3 "
f"bucket: {s3_url}." # noqa: E501
)
# Download from S3
# Saving to a temporary file first to avoid corrupting the file
temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
# Create any missing directories in the path
temporary_filename.parent.mkdir(parents=True, exist_ok=True)
with requests.get(s3_url, stream=True) as r:
r.raise_for_status()
with open(temporary_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
f.flush()
# Finally move the temporary file to the correct location
temporary_filename.rename(path)
return ort.InferenceSession(
path, providers=provider, provider_options=optionsm
)
class ClipEmbedding(EmbeddingFunction): class ClipEmbedding(EmbeddingFunction):
"""Embedding function for CLIP model used in Chroma.""" """Embedding function for CLIP model used in Chroma."""