From c659d5ae584dee8f7a3f3b8a96a1a961a7d6757b Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Thu, 10 Oct 2024 17:19:19 -0500 Subject: [PATCH] Add config option to select fp16 or quantized jina vision model --- docs/docs/configuration/reference.md | 2 ++ docs/docs/configuration/semantic_search.md | 10 ++++++++++ frigate/config/semantic_search.py | 3 +++ frigate/embeddings/embeddings.py | 22 ++++++++++++++++------ web/src/pages/Explore.tsx | 9 ++++++--- web/src/types/frigateConfig.ts | 1 + 6 files changed, 38 insertions(+), 9 deletions(-) diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 66e49fb7f..234478714 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -520,6 +520,8 @@ semantic_search: reindex: False # Optional: Set device used to run embeddings, options are AUTO, CPU, GPU. (default: shown below) device: "AUTO" + # Optional: Set the model size used for embeddings. (default: shown below) + model_size: "small" # Optional: Configuration for AI generated tracked object descriptions # NOTE: Semantic Search must be enabled for this to do anything. diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md index 7cb8ca769..87ccbf802 100644 --- a/docs/docs/configuration/semantic_search.md +++ b/docs/docs/configuration/semantic_search.md @@ -39,6 +39,16 @@ The vision model is able to embed both images and text into the same vector spac The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions. +Differently weighted CLIP models are available and can be selected by setting the `model_size` config option: + +```yaml +semantic_search: + enabled: True + model_size: small +``` + +Using `large` as the model size setting employs the full Jina model appropriate for high performance systems running a GPU. The `small` size uses a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality. Most users will not need to change this setting from the default of `small`. + ## Usage 1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results. diff --git a/frigate/config/semantic_search.py b/frigate/config/semantic_search.py index ecdcd12d1..fdaf0fff4 100644 --- a/frigate/config/semantic_search.py +++ b/frigate/config/semantic_search.py @@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel): default=False, title="Reindex all detections on startup." ) device: str = Field(default="AUTO", title="Device Type") + model_size: str = Field( + default="small", title="The size of the embeddings model used." + ) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index e9d8ab833..f33bb823d 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -68,7 +68,9 @@ class Embeddings: models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", "jinaai/jina-clip-v1-tokenizer", - "jinaai/jina-clip-v1-vision_model_fp16.onnx", + "jinaai/jina-clip-v1-vision_model_fp16.onnx" + if config.model_size == "large" + else "jinaai/jina-clip-v1-vision_model_quantized.onnx", "jinaai/jina-clip-v1-preprocessor_config.json", ] @@ -100,13 +102,21 @@ class Embeddings: device="CPU", ) + model_file = ( + "vision_model_fp16.onnx" + if self.config.model_size == "large" + else "vision_model_quantized.onnx" + ) + + download_urls = { + model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}", + "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json", + } + self.vision_embedding = GenericONNXEmbedding( model_name="jinaai/jina-clip-v1", - model_file="vision_model_fp16.onnx", - download_urls={ - "vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx", - "preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json", - }, + model_file=model_file, + download_urls=download_urls, embedding_function=jina_vision_embedding_function, model_type="vision", requestor=self.requestor, diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 8607c8760..59c3fd895 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -207,9 +207,12 @@ export default function Explore() { const { payload: textTokenizerState } = useModelState( "jinaai/jina-clip-v1-tokenizer", ); - const { payload: visionModelState } = useModelState( - "jinaai/jina-clip-v1-vision_model_fp16.onnx", - ); + const modelFile = + config?.semantic_search.model_size === "large" + ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" + : "jinaai/jina-clip-v1-vision_model_quantized.onnx"; + + const { payload: visionModelState } = useModelState(modelFile); const { payload: visionFeatureExtractorState } = useModelState( "jinaai/jina-clip-v1-preprocessor_config.json", ); diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 68003f0e0..fe889ed9d 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -417,6 +417,7 @@ export interface FrigateConfig { semantic_search: { enabled: boolean; + model_size: string; }; snapshots: {