diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md
index a02a313ba..a512943c9 100644
--- a/docs/docs/configuration/genai/config.md
+++ b/docs/docs/configuration/genai/config.md
@@ -201,7 +201,7 @@ Cloud Generative AI providers require an active internet connection to send imag
 
 ### Ollama Cloud
 
-Ollama also supports [cloud models](https://ollama.com/cloud), where your local Ollama instance handles requests from Frigate, but model inference is performed in the cloud. Set up Ollama locally, sign in with your Ollama account, and specify the cloud model name in your Frigate config. For more details, see the Ollama cloud model [docs](https://docs.ollama.com/cloud).
+Ollama also supports [cloud models](https://ollama.com/cloud), where model inference is performed in the cloud. You can connect directly to Ollama Cloud by setting `base_url` to `https://ollama.com` and providing an API key. Alternatively, you can run Ollama locally and use a cloud model name so your local instance forwards requests to the cloud. For more details, see the Ollama cloud model [docs](https://docs.ollama.com/cloud).
 
 #### Configuration
 
@@ -210,7 +210,8 @@ Ollama also supports [cloud models](https://ollama.com/cloud), where your local
 
 1. Navigate to <NavPath path="Settings > Enrichments > Generative AI" />.
    - Set **Provider** to `ollama`
-   - Set **Base URL** to your local Ollama address (e.g., `http://localhost:11434`)
+   - Set **Base URL** to your local Ollama address (e.g., `http://localhost:11434`) or `https://ollama.com` for direct cloud inference
+   - Set **API key** if required by your endpoint (e.g., when using `https://ollama.com`)
    - Set **Model** to the cloud model name
 
 </TabItem>
@@ -223,6 +224,16 @@ genai:
   model: cloud-model-name
 ```
 
+or when using Ollama Cloud directly
+
+```yaml
+genai:
+  provider: ollama
+  base_url: https://ollama.com
+  model: cloud-model-name
+  api_key: your-api-key
+```
+
 </TabItem>
 </ConfigTabs>
 
diff --git a/frigate/genai/ollama.py b/frigate/genai/ollama.py
index 1c0e222d9..6ba803bcc 100644
--- a/frigate/genai/ollama.py
+++ b/frigate/genai/ollama.py
@@ -31,6 +31,12 @@ class OllamaClient(GenAIClient):
     provider: ApiClient | None
     provider_options: dict[str, Any]
 
+    def _auth_headers(self) -> dict | None:
+        if self.genai_config.api_key:
+            return {"Authorization": "Bearer " + self.genai_config.api_key}
+
+        return None
+
     def _init_provider(self) -> ApiClient | None:
         """Initialize the client."""
         self.provider_options = {
@@ -39,7 +45,11 @@ class OllamaClient(GenAIClient):
         }
 
         try:
-            client = ApiClient(host=self.genai_config.base_url, timeout=self.timeout)
+            client = ApiClient(
+                host=self.genai_config.base_url,
+                timeout=self.timeout,
+                headers=self._auth_headers(),
+            )
             # ensure the model is available locally
             response = client.show(self.genai_config.model)
             if response.get("error"):
@@ -166,7 +176,9 @@ class OllamaClient(GenAIClient):
                 return []
             try:
                 client = ApiClient(
-                    host=self.genai_config.base_url, timeout=self.timeout
+                    host=self.genai_config.base_url,
+                    timeout=self.timeout,
+                    headers=self._auth_headers(),
                 )
             except Exception:
                 return []
@@ -344,6 +356,7 @@ class OllamaClient(GenAIClient):
                 async_client = OllamaAsyncClient(
                     host=self.genai_config.base_url,
                     timeout=self.timeout,
+                    headers=self._auth_headers(),
                 )
                 response = await async_client.chat(**request_params)
                 result = self._message_from_response(response)
@@ -359,6 +372,7 @@ class OllamaClient(GenAIClient):
             async_client = OllamaAsyncClient(
                 host=self.genai_config.base_url,
                 timeout=self.timeout,
+                headers=self._auth_headers(),
             )
             content_parts: list[str] = []
             final_message: dict[str, Any] | None = None