From 1ff6921d33b78f80a29a8271a172253626ded8d7 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Sat, 17 Jan 2026 07:19:13 -0700 Subject: [PATCH] Implement llama.cpp GenAI Provider --- frigate/config/camera/genai.py | 1 + frigate/genai/llama_cpp.py | 101 +++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 frigate/genai/llama_cpp.py diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py index 3c6baeb15..09e3504a7 100644 --- a/frigate/config/camera/genai.py +++ b/frigate/config/camera/genai.py @@ -14,6 +14,7 @@ class GenAIProviderEnum(str, Enum): azure_openai = "azure_openai" gemini = "gemini" ollama = "ollama" + llamacpp = "llamacpp" class GenAIConfig(FrigateBaseModel): diff --git a/frigate/genai/llama_cpp.py b/frigate/genai/llama_cpp.py new file mode 100644 index 000000000..45e364bc0 --- /dev/null +++ b/frigate/genai/llama_cpp.py @@ -0,0 +1,101 @@ +"""llama.cpp Provider for Frigate AI.""" + +import base64 +import logging +from typing import Any, Optional + +import requests + +from frigate.config import GenAIProviderEnum +from frigate.genai import GenAIClient, register_genai_provider + +logger = logging.getLogger(__name__) + + +@register_genai_provider(GenAIProviderEnum.llamacpp) +class LlamaCppClient(GenAIClient): + """Generative AI client for Frigate using llama.cpp server.""" + + LOCAL_OPTIMIZED_OPTIONS = { + "temperature": 0.7, + "repeat_penalty": 1.05, + "top_p": 0.8, + } + + provider: str # base_url + provider_options: dict[str, Any] + + def _init_provider(self): + """Initialize the client.""" + self.provider_options = { + **self.LOCAL_OPTIMIZED_OPTIONS, + **self.genai_config.provider_options, + } + return ( + self.genai_config.base_url.rstrip("/") + if self.genai_config.base_url + else None + ) + + def _send(self, prompt: str, images: list[bytes]) -> Optional[str]: + """Submit a request to llama.cpp server.""" + if self.provider is None: + logger.warning( + "llama.cpp provider has not been initialized, a description will not be generated. Check your llama.cpp configuration." + ) + return None + + try: + content = [] + for image in images: + encoded_image = base64.b64encode(image).decode("utf-8") + content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{encoded_image}", + }, + } + ) + content.append( + { + "type": "text", + "text": prompt, + } + ) + + # Build request payload with llama.cpp native options + payload = { + "messages": [ + { + "role": "user", + "content": content, + }, + ], + **self.provider_options, + } + + response = requests.post( + f"{self.provider}/v1/chat/completions", + json=payload, + timeout=self.timeout, + ) + response.raise_for_status() + result = response.json() + + if ( + result is not None + and "choices" in result + and len(result["choices"]) > 0 + ): + choice = result["choices"][0] + if "message" in choice and "content" in choice["message"]: + return choice["message"]["content"].strip() + return None + except Exception as e: + logger.warning("llama.cpp returned an error: %s", str(e)) + return None + + def get_context_size(self) -> int: + """Get the context window size for llama.cpp.""" + return self.genai_config.provider_options.get("context_size", 4096)