feat(genai): add api_key auth support for ollama cloud (#23096)

- Add _auth_headers() helper to pass Bearer token when api_key is set - Wire headers into all Ollama client instantiations (sync + async) - Update docs with Ollama Cloud direct connection example and yaml config
2026-05-03 12:07:40 +03:00 · 2026-05-03 00:55:25 +01:00 · 2026-05-03 00:55:25 +01:00 · b6fd86a066
commit b6fd86a066
parent 147cd5cc2b
2 changed files with 29 additions and 4 deletions
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@ -201,7 +201,7 @@ Cloud Generative AI providers require an active internet connection to send imag

 ### Ollama Cloud

-Ollama also supports [cloud models](https://ollama.com/cloud), where your local Ollama instance handles requests from Frigate, but model inference is performed in the cloud. Set up Ollama locally, sign in with your Ollama account, and specify the cloud model name in your Frigate config. For more details, see the Ollama cloud model [docs](https://docs.ollama.com/cloud).
+Ollama also supports [cloud models](https://ollama.com/cloud), where model inference is performed in the cloud. You can connect directly to Ollama Cloud by setting `base_url` to `https://ollama.com` and providing an API key. Alternatively, you can run Ollama locally and use a cloud model name so your local instance forwards requests to the cloud. For more details, see the Ollama cloud model [docs](https://docs.ollama.com/cloud).

 #### Configuration

@ -210,7 +210,8 @@ Ollama also supports [cloud models](https://ollama.com/cloud), where your local

 1. Navigate to <NavPath path="Settings > Enrichments > Generative AI" />.
   - Set **Provider** to `ollama`
-   - Set **Base URL** to your local Ollama address (e.g., `http://localhost:11434`)
+   - Set **Base URL** to your local Ollama address (e.g., `http://localhost:11434`) or `https://ollama.com` for direct cloud inference
+   - Set **API key** if required by your endpoint (e.g., when using `https://ollama.com`)
   - Set **Model** to the cloud model name

 </TabItem>
@ -223,6 +224,16 @@ genai:
  model: cloud-model-name
 ```

+or when using Ollama Cloud directly
+
+```yaml
+genai:
+  provider: ollama
+  base_url: https://ollama.com
+  model: cloud-model-name
+  api_key: your-api-key
+```
+
 </TabItem>
 </ConfigTabs>

--- a/frigate/genai/ollama.py
+++ b/frigate/genai/ollama.py
@ -31,6 +31,12 @@ class OllamaClient(GenAIClient):
    provider: ApiClient | None
    provider_options: dict[str, Any]

+    def _auth_headers(self) -> dict | None:
+        if self.genai_config.api_key:
+            return {"Authorization": "Bearer " + self.genai_config.api_key}
+
+        return None
+
    def _init_provider(self) -> ApiClient | None:
        """Initialize the client."""
        self.provider_options = {
@ -39,7 +45,11 @@ class OllamaClient(GenAIClient):
        }

        try:
-            client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
+            client = ApiClient(
+                host=self.genai_config.base_url,
+                timeout=self.timeout,
+                headers=self._auth_headers(),
+            )
            # ensure the model is available locally
            response = client.show(self.genai_config.model)
            if response.get("error"):
@ -166,7 +176,9 @@ class OllamaClient(GenAIClient):
                return []
            try:
                client = ApiClient(
-                    host=self.genai_config.base_url, timeout=self.timeout
+                    host=self.genai_config.base_url,
+                    timeout=self.timeout,
+                    headers=self._auth_headers(),
                )
            except Exception:
                return []
@ -344,6 +356,7 @@ class OllamaClient(GenAIClient):
                async_client = OllamaAsyncClient(
                    host=self.genai_config.base_url,
                    timeout=self.timeout,
+                    headers=self._auth_headers(),
                )
                response = await async_client.chat(**request_params)
                result = self._message_from_response(response)
@ -359,6 +372,7 @@ class OllamaClient(GenAIClient):
            async_client = OllamaAsyncClient(
                host=self.genai_config.base_url,
                timeout=self.timeout,
+                headers=self._auth_headers(),
            )
            content_parts: list[str] = []
            final_message: dict[str, Any] | None = None