Allow setting context size for openai compatible endpoints

2026-01-22 20:18:30 +03:00 · 2026-01-12 12:57:14 -07:00 · 2026-01-12 12:57:14 -07:00 · 6aafa0c6f9
commit 6aafa0c6f9
parent 7fd1b93511
2 changed files with 43 additions and 11 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -41,12 +41,12 @@ If you are trying to use a single model for Frigate and HomeAssistant, it will n

 The following models are recommended:

-| Model             | Notes                                                                |
-| ----------------- | -------------------------------------------------------------------- |
-| `qwen3-vl`        | Strong visual and situational understanding, higher vram requirement |
-| `Intern3.5VL`     | Relatively fast with good vision comprehension                       |
-| `gemma3`          | Strong frame-to-frame understanding, slower inference times          |
-| `qwen2.5-vl`      | Fast but capable model with good vision comprehension                |
+| Model         | Notes                                                                |
+| ------------- | -------------------------------------------------------------------- |
+| `qwen3-vl`    | Strong visual and situational understanding, higher vram requirement |
+| `Intern3.5VL` | Relatively fast with good vision comprehension                       |
+| `gemma3`      | Strong frame-to-frame understanding, slower inference times          |
+| `qwen2.5-vl`  | Fast but capable model with good vision comprehension                |

 :::note

@ -61,10 +61,10 @@ genai:
  provider: ollama
  base_url: http://localhost:11434
  model: minicpm-v:8b
-  provider_options:  # other Ollama client options can be defined
+  provider_options: # other Ollama client options can be defined
    keep_alive: -1
    options:
-        num_ctx: 8192  # make sure the context matches other services that are using ollama
+      num_ctx: 8192 # make sure the context matches other services that are using ollama
 ```

 ## Google Gemini
@ -120,6 +120,23 @@ To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` env

 :::

+:::tip
+
+For OpenAI-compatible servers (such as llama.cpp) that don't expose the configured context size in the API response, you can manually specify the context size in `provider_options`:
+
+```yaml
+genai:
+  provider: openai
+  base_url: http://your-llama-server
+  model: your-model-name
+  provider_options:
+    context_size: 8192 # Specify the configured context size
+```
+
+This ensures Frigate uses the correct context window size when generating prompts.
+
+:::
+
 ## Azure OpenAI

 Microsoft offers several vision models through Azure OpenAI. A subscription is required.
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@ -22,9 +22,14 @@ class OpenAIClient(GenAIClient):

    def _init_provider(self):
        """Initialize the client."""
-        return OpenAI(
-            api_key=self.genai_config.api_key, **self.genai_config.provider_options
-        )
+        # Extract context_size from provider_options as it's not a valid OpenAI client parameter
+        # It will be used in get_context_size() instead
+        provider_opts = {
+            k: v
+            for k, v in self.genai_config.provider_options.items()
+            if k != "context_size"
+        }
+        return OpenAI(api_key=self.genai_config.api_key, **provider_opts)

    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
        """Submit a request to OpenAI."""
@ -73,6 +78,16 @@ class OpenAIClient(GenAIClient):
        if self.context_size is not None:
            return self.context_size

+        # First check provider_options for manually specified context size
+        # This is necessary for llama.cpp and other OpenAI-compatible servers
+        # that don't expose the configured runtime context size in the API response
+        if "context_size" in self.genai_config.provider_options:
+            self.context_size = self.genai_config.provider_options["context_size"]
+            logger.debug(
+                f"Using context size {self.context_size} from provider_options for model {self.genai_config.model}"
+            )
+            return self.context_size
+
        try:
            models = self.provider.models.list()
            for model in models.data: