diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md
index 7e5618b5b..3a54eeddf 100644
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@@ -41,12 +41,12 @@ If you are trying to use a single model for Frigate and HomeAssistant, it will n
 
 The following models are recommended:
 
-| Model             | Notes                                                                |
-| ----------------- | -------------------------------------------------------------------- |
-| `qwen3-vl`        | Strong visual and situational understanding, higher vram requirement |
-| `Intern3.5VL`     | Relatively fast with good vision comprehension                       |
-| `gemma3`          | Strong frame-to-frame understanding, slower inference times          |
-| `qwen2.5-vl`      | Fast but capable model with good vision comprehension                |
+| Model         | Notes                                                                |
+| ------------- | -------------------------------------------------------------------- |
+| `qwen3-vl`    | Strong visual and situational understanding, higher vram requirement |
+| `Intern3.5VL` | Relatively fast with good vision comprehension                       |
+| `gemma3`      | Strong frame-to-frame understanding, slower inference times          |
+| `qwen2.5-vl`  | Fast but capable model with good vision comprehension                |
 
 :::note
 
@@ -61,10 +61,10 @@ genai:
   provider: ollama
   base_url: http://localhost:11434
   model: minicpm-v:8b
-  provider_options:  # other Ollama client options can be defined
+  provider_options: # other Ollama client options can be defined
     keep_alive: -1
     options:
-        num_ctx: 8192  # make sure the context matches other services that are using ollama
+      num_ctx: 8192 # make sure the context matches other services that are using ollama
 ```
 
 ## Google Gemini
@@ -120,6 +120,23 @@ To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` env
 
 :::
 
+:::tip
+
+For OpenAI-compatible servers (such as llama.cpp) that don't expose the configured context size in the API response, you can manually specify the context size in `provider_options`:
+
+```yaml
+genai:
+  provider: openai
+  base_url: http://your-llama-server
+  model: your-model-name
+  provider_options:
+    context_size: 8192 # Specify the configured context size
+```
+
+This ensures Frigate uses the correct context window size when generating prompts.
+
+:::
+
 ## Azure OpenAI
 
 Microsoft offers several vision models through Azure OpenAI. A subscription is required.
diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py
index 631cb3480..441e93a80 100644
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@@ -22,9 +22,14 @@ class OpenAIClient(GenAIClient):
 
     def _init_provider(self):
         """Initialize the client."""
-        return OpenAI(
-            api_key=self.genai_config.api_key, **self.genai_config.provider_options
-        )
+        # Extract context_size from provider_options as it's not a valid OpenAI client parameter
+        # It will be used in get_context_size() instead
+        provider_opts = {
+            k: v
+            for k, v in self.genai_config.provider_options.items()
+            if k != "context_size"
+        }
+        return OpenAI(api_key=self.genai_config.api_key, **provider_opts)
 
     def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
         """Submit a request to OpenAI."""
@@ -73,6 +78,16 @@ class OpenAIClient(GenAIClient):
         if self.context_size is not None:
             return self.context_size
 
+        # First check provider_options for manually specified context size
+        # This is necessary for llama.cpp and other OpenAI-compatible servers
+        # that don't expose the configured runtime context size in the API response
+        if "context_size" in self.genai_config.provider_options:
+            self.context_size = self.genai_config.provider_options["context_size"]
+            logger.debug(
+                f"Using context size {self.context_size} from provider_options for model {self.genai_config.model}"
+            )
+            return self.context_size
+
         try:
             models = self.provider.models.list()
             for model in models.data: