From 05a4605e7696aceadbfcfbddd1340e0973802088 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 2 Oct 2025 08:42:54 -0600 Subject: [PATCH] Calculate context size to determine number of images --- .../data_processing/post/review_descriptions.py | 16 +++++++++++++++- frigate/genai/__init__.py | 4 ++++ frigate/genai/azure-openai.py | 4 ++++ frigate/genai/gemini.py | 5 +++++ frigate/genai/ollama.py | 6 ++++++ frigate/genai/openai.py | 5 +++++ 6 files changed, 39 insertions(+), 1 deletion(-) diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py index 88dcf7300..27c3a2612 100644 --- a/frigate/data_processing/post/review_descriptions.py +++ b/frigate/data_processing/post/review_descriptions.py @@ -43,6 +43,19 @@ class ReviewDescriptionProcessor(PostProcessorApi): self.review_descs_dps = EventsPerSecond() self.review_descs_dps.start() + def calculate_frame_count(self) -> int: + """Calculate optimal number of frames based on context size.""" + context_size = self.genai_client.get_context_size() + + if context_size > 10000: + return 18 + elif context_size > 6000: + return 14 + elif context_size > 4000: + return 10 + else: + return 8 + def process_data(self, data, data_type): self.metrics.review_desc_dps.value = self.review_descs_dps.eps() @@ -176,7 +189,6 @@ class ReviewDescriptionProcessor(PostProcessorApi): camera: str, start_time: float, end_time: float, - desired_frame_count: int = 12, ) -> list[str]: preview_dir = os.path.join(CACHE_DIR, "preview_frames") file_start = f"preview_{camera}" @@ -203,6 +215,8 @@ class ReviewDescriptionProcessor(PostProcessorApi): all_frames.append(os.path.join(preview_dir, file)) frame_count = len(all_frames) + desired_frame_count = self.calculate_frame_count() + if frame_count <= desired_frame_count: return all_frames diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index 15ef9c4aa..ecb0244e2 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -253,6 +253,10 @@ Rules for the report: """Submit a request to the provider.""" return None + def get_context_size(self) -> int: + """Get the context window size for this provider in tokens.""" + return 4096 + def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]: """Get the GenAI client.""" diff --git a/frigate/genai/azure-openai.py b/frigate/genai/azure-openai.py index 155fa2431..eba8b47c0 100644 --- a/frigate/genai/azure-openai.py +++ b/frigate/genai/azure-openai.py @@ -71,3 +71,7 @@ class OpenAIClient(GenAIClient): if len(result.choices) > 0: return result.choices[0].message.content.strip() return None + + def get_context_size(self) -> int: + """Get the context window size for Azure OpenAI.""" + return 128000 diff --git a/frigate/genai/gemini.py b/frigate/genai/gemini.py index 8c355b37a..f94448d75 100644 --- a/frigate/genai/gemini.py +++ b/frigate/genai/gemini.py @@ -53,3 +53,8 @@ class GeminiClient(GenAIClient): # No description was generated return None return description + + def get_context_size(self) -> int: + """Get the context window size for Gemini.""" + # Gemini Pro Vision has a 1M token context window + return 1000000 diff --git a/frigate/genai/ollama.py b/frigate/genai/ollama.py index 0fb44d785..30247e31c 100644 --- a/frigate/genai/ollama.py +++ b/frigate/genai/ollama.py @@ -54,3 +54,9 @@ class OllamaClient(GenAIClient): except (TimeoutException, ResponseError) as e: logger.warning("Ollama returned an error: %s", str(e)) return None + + def get_context_size(self) -> int: + """Get the context window size for Ollama.""" + return self.genai_config.provider_options.get("options", {}).get( + "num_ctx", 4096 + ) diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py index eb3016fad..046a18aa9 100644 --- a/frigate/genai/openai.py +++ b/frigate/genai/openai.py @@ -66,3 +66,8 @@ class OpenAIClient(GenAIClient): except (TimeoutException, Exception) as e: logger.warning("OpenAI returned an error: %s", str(e)) return None + + def get_context_size(self) -> int: + """Get the context window size for OpenAI.""" + # OpenAI GPT-4 Vision models have 128K token context window + return 128000