Allow setting context size for openai compatible endpoints

2026-01-29 07:24:56 +03:00 · 2026-01-12 12:57:14 -07:00 · 2026-01-12 12:57:14 -07:00 · 6aafa0c6f9
commit 6aafa0c6f9
parent 7fd1b93511
2 changed files with 43 additions and 11 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -42,7 +42,7 @@ If you are trying to use a single model for Frigate and HomeAssistant, it will n
 The following models are recommended:
 | Model         | Notes                                                                |
-| ----------------- | -------------------------------------------------------------------- |
+| ------------- | -------------------------------------------------------------------- |
 | `qwen3-vl`    | Strong visual and situational understanding, higher vram requirement |
 | `Intern3.5VL` | Relatively fast with good vision comprehension                       |
 | `gemma3`      | Strong frame-to-frame understanding, slower inference times          |
@ -120,6 +120,23 @@ To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` env
 :::
 :::tip
 For OpenAI-compatible servers (such as llama.cpp) that don't expose the configured context size in the API response, you can manually specify the context size in `provider_options`:
 ```yaml
 genai:
  provider: openai
  base_url: http://your-llama-server
  model: your-model-name
  provider_options:
    context_size: 8192 # Specify the configured context size
 ```
 This ensures Frigate uses the correct context window size when generating prompts.
 :::
 ## Azure OpenAI
 Microsoft offers several vision models through Azure OpenAI. A subscription is required.
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@ -22,9 +22,14 @@ class OpenAIClient(GenAIClient):
    def _init_provider(self):
        """Initialize the client."""
-        return OpenAI(
+        # Extract context_size from provider_options as it's not a valid OpenAI client parameter
-            api_key=self.genai_config.api_key, **self.genai_config.provider_options
+        # It will be used in get_context_size() instead
-        )
+        provider_opts = {
            k: v
            for k, v in self.genai_config.provider_options.items()
            if k != "context_size"
        }
        return OpenAI(api_key=self.genai_config.api_key, **provider_opts)
    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
        """Submit a request to OpenAI."""
@ -73,6 +78,16 @@ class OpenAIClient(GenAIClient):
        if self.context_size is not None:
            return self.context_size
        # First check provider_options for manually specified context size
        # This is necessary for llama.cpp and other OpenAI-compatible servers
        # that don't expose the configured runtime context size in the API response
        if "context_size" in self.genai_config.provider_options:
            self.context_size = self.genai_config.provider_options["context_size"]
            logger.debug(
                f"Using context size {self.context_size} from provider_options for model {self.genai_config.model}"
            )
            return self.context_size
        try:
            models = self.provider.models.list()
            for model in models.data: