From 1ff6921d33b78f80a29a8271a172253626ded8d7 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Sat, 17 Jan 2026 07:19:13 -0700
Subject: [PATCH] Implement llama.cpp GenAI Provider

---
 frigate/config/camera/genai.py |   1 +
 frigate/genai/llama_cpp.py     | 101 +++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 frigate/genai/llama_cpp.py

diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py
index 3c6baeb15..09e3504a7 100644
--- a/frigate/config/camera/genai.py
+++ b/frigate/config/camera/genai.py
@@ -14,6 +14,7 @@ class GenAIProviderEnum(str, Enum):
     azure_openai = "azure_openai"
     gemini = "gemini"
     ollama = "ollama"
+    llamacpp = "llamacpp"
 
 
 class GenAIConfig(FrigateBaseModel):
diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py
new file mode 100644
index 000000000..45e364bc0
--- /dev/null
+++ b/frigate/genai/llama_cpp.py
@@ -0,0 +1,101 @@
+"""llama.cpp Provider for Frigate AI."""
+
+import base64
+import logging
+from typing import Any, Optional
+
+import requests
+
+from frigate.config import GenAIProviderEnum
+from frigate.genai import GenAIClient, register_genai_provider
+
+logger = logging.getLogger(__name__)
+
+
+@register_genai_provider(GenAIProviderEnum.llamacpp)
+class LlamaCppClient(GenAIClient):
+    """Generative AI client for Frigate using llama.cpp server."""
+
+    LOCAL_OPTIMIZED_OPTIONS = {
+        "temperature": 0.7,
+        "repeat_penalty": 1.05,
+        "top_p": 0.8,
+    }
+
+    provider: str  # base_url
+    provider_options: dict[str, Any]
+
+    def _init_provider(self):
+        """Initialize the client."""
+        self.provider_options = {
+            **self.LOCAL_OPTIMIZED_OPTIONS,
+            **self.genai_config.provider_options,
+        }
+        return (
+            self.genai_config.base_url.rstrip("/")
+            if self.genai_config.base_url
+            else None
+        )
+
+    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
+        """Submit a request to llama.cpp server."""
+        if self.provider is None:
+            logger.warning(
+                "llama.cpp provider has not been initialized, a description will not be generated. Check your llama.cpp configuration."
+            )
+            return None
+
+        try:
+            content = []
+            for image in images:
+                encoded_image = base64.b64encode(image).decode("utf-8")
+                content.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{encoded_image}",
+                        },
+                    }
+                )
+            content.append(
+                {
+                    "type": "text",
+                    "text": prompt,
+                }
+            )
+
+            # Build request payload with llama.cpp native options
+            payload = {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": content,
+                    },
+                ],
+                **self.provider_options,
+            }
+
+            response = requests.post(
+                f"{self.provider}/v1/chat/completions",
+                json=payload,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+            result = response.json()
+
+            if (
+                result is not None
+                and "choices" in result
+                and len(result["choices"]) > 0
+            ):
+                choice = result["choices"][0]
+                if "message" in choice and "content" in choice["message"]:
+                    return choice["message"]["content"].strip()
+            return None
+        except Exception as e:
+            logger.warning("llama.cpp returned an error: %s", str(e))
+            return None
+
+    def get_context_size(self) -> int:
+        """Get the context window size for llama.cpp."""
+        return self.genai_config.provider_options.get("context_size", 4096)