diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md index 7e5618b5b..3a54eeddf 100644 --- a/docs/docs/configuration/genai/config.md +++ b/docs/docs/configuration/genai/config.md @@ -41,12 +41,12 @@ If you are trying to use a single model for Frigate and HomeAssistant, it will n The following models are recommended: -| Model | Notes | -| ----------------- | -------------------------------------------------------------------- | -| `qwen3-vl` | Strong visual and situational understanding, higher vram requirement | -| `Intern3.5VL` | Relatively fast with good vision comprehension | -| `gemma3` | Strong frame-to-frame understanding, slower inference times | -| `qwen2.5-vl` | Fast but capable model with good vision comprehension | +| Model | Notes | +| ------------- | -------------------------------------------------------------------- | +| `qwen3-vl` | Strong visual and situational understanding, higher vram requirement | +| `Intern3.5VL` | Relatively fast with good vision comprehension | +| `gemma3` | Strong frame-to-frame understanding, slower inference times | +| `qwen2.5-vl` | Fast but capable model with good vision comprehension | :::note @@ -61,10 +61,10 @@ genai: provider: ollama base_url: http://localhost:11434 model: minicpm-v:8b - provider_options: # other Ollama client options can be defined + provider_options: # other Ollama client options can be defined keep_alive: -1 options: - num_ctx: 8192 # make sure the context matches other services that are using ollama + num_ctx: 8192 # make sure the context matches other services that are using ollama ``` ## Google Gemini @@ -120,6 +120,23 @@ To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` env ::: +:::tip + +For OpenAI-compatible servers (such as llama.cpp) that don't expose the configured context size in the API response, you can manually specify the context size in `provider_options`: + +```yaml +genai: + provider: openai + base_url: http://your-llama-server + model: your-model-name + provider_options: + context_size: 8192 # Specify the configured context size +``` + +This ensures Frigate uses the correct context window size when generating prompts. + +::: + ## Azure OpenAI Microsoft offers several vision models through Azure OpenAI. A subscription is required. diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py index 631cb3480..441e93a80 100644 --- a/frigate/genai/openai.py +++ b/frigate/genai/openai.py @@ -22,9 +22,14 @@ class OpenAIClient(GenAIClient): def _init_provider(self): """Initialize the client.""" - return OpenAI( - api_key=self.genai_config.api_key, **self.genai_config.provider_options - ) + # Extract context_size from provider_options as it's not a valid OpenAI client parameter + # It will be used in get_context_size() instead + provider_opts = { + k: v + for k, v in self.genai_config.provider_options.items() + if k != "context_size" + } + return OpenAI(api_key=self.genai_config.api_key, **provider_opts) def _send(self, prompt: str, images: list[bytes]) -> Optional[str]: """Submit a request to OpenAI.""" @@ -73,6 +78,16 @@ class OpenAIClient(GenAIClient): if self.context_size is not None: return self.context_size + # First check provider_options for manually specified context size + # This is necessary for llama.cpp and other OpenAI-compatible servers + # that don't expose the configured runtime context size in the API response + if "context_size" in self.genai_config.provider_options: + self.context_size = self.genai_config.provider_options["context_size"] + logger.debug( + f"Using context size {self.context_size} from provider_options for model {self.genai_config.model}" + ) + return self.context_size + try: models = self.provider.models.list() for model in models.data: