Allow setting context size for openai compatible endpoints

2026-05-09 15:05:26 +03:00 · 2026-01-12 12:57:14 -07:00 · 2026-01-12 12:57:14 -07:00 · 6aafa0c6f9
commit 6aafa0c6f9
parent 7fd1b93511
2 changed files with 43 additions and 11 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -42,7 +42,7 @@ If you are trying to use a single model for Frigate and HomeAssistant, it will n
 The following models are recommended:

 | Model         | Notes                                                                |
-| ----------------- | -------------------------------------------------------------------- |
+| ------------- | -------------------------------------------------------------------- |
 | `qwen3-vl`    | Strong visual and situational understanding, higher vram requirement |
 | `Intern3.5VL` | Relatively fast with good vision comprehension                       |
 | `gemma3`      | Strong frame-to-frame understanding, slower inference times          |
@ -120,6 +120,23 @@ To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` env

 :::

+:::tip
+
+For OpenAI-compatible servers (such as llama.cpp) that don't expose the configured context size in the API response, you can manually specify the context size in `provider_options`:
+
+```yaml
+genai:
+  provider: openai
+  base_url: http://your-llama-server
+  model: your-model-name
+  provider_options:
+    context_size: 8192 # Specify the configured context size
+```
+
+This ensures Frigate uses the correct context window size when generating prompts.
+
+:::
+
 ## Azure OpenAI

 Microsoft offers several vision models through Azure OpenAI. A subscription is required.
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@ -22,9 +22,14 @@ class OpenAIClient(GenAIClient):

    def _init_provider(self):
        """Initialize the client."""
-        return OpenAI(
-            api_key=self.genai_config.api_key, **self.genai_config.provider_options
-        )
+        # Extract context_size from provider_options as it's not a valid OpenAI client parameter
+        # It will be used in get_context_size() instead
+        provider_opts = {
+            k: v
+            for k, v in self.genai_config.provider_options.items()
+            if k != "context_size"
+        }
+        return OpenAI(api_key=self.genai_config.api_key, **provider_opts)

    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
        """Submit a request to OpenAI."""
@ -73,6 +78,16 @@ class OpenAIClient(GenAIClient):
        if self.context_size is not None:
            return self.context_size

+        # First check provider_options for manually specified context size
+        # This is necessary for llama.cpp and other OpenAI-compatible servers
+        # that don't expose the configured runtime context size in the API response
+        if "context_size" in self.genai_config.provider_options:
+            self.context_size = self.genai_config.provider_options["context_size"]
+            logger.debug(
+                f"Using context size {self.context_size} from provider_options for model {self.genai_config.model}"
+            )
+            return self.context_size
+
        try:
            models = self.provider.models.list()
            for model in models.data: