Fix sending images

This commit is contained in:
Nicolas Mowen 2026-02-19 08:24:19 -07:00
parent 4cd581fc43
commit 72c73b153c

View File

@ -1,12 +1,14 @@
"""llama.cpp Provider for Frigate AI.""" """llama.cpp Provider for Frigate AI."""
import base64 import base64
import io
import json import json
import logging import logging
from typing import Any, Optional from typing import Any, Optional
import numpy as np import numpy as np
import requests import requests
from PIL import Image
from frigate.config import GenAIProviderEnum from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider from frigate.genai import GenAIClient, register_genai_provider
@ -14,6 +16,20 @@ from frigate.genai import GenAIClient, register_genai_provider
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _to_jpeg(img_bytes: bytes) -> bytes | None:
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
try:
img = Image.open(io.BytesIO(img_bytes))
if img.mode != "RGB":
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=85)
return buf.getvalue()
except Exception as e:
logger.warning("Failed to convert image to JPEG: %s", e)
return None
@register_genai_provider(GenAIProviderEnum.llamacpp) @register_genai_provider(GenAIProviderEnum.llamacpp)
class LlamaCppClient(GenAIClient): class LlamaCppClient(GenAIClient):
"""Generative AI client for Frigate using llama.cpp server.""" """Generative AI client for Frigate using llama.cpp server."""
@ -130,8 +146,15 @@ class LlamaCppClient(GenAIClient):
for text in texts: for text in texts:
content.append({"prompt_string": text}) content.append({"prompt_string": text})
for img in images: for img in images:
encoded = base64.b64encode(img).decode("utf-8") # llama.cpp uses STB which does not support WebP; convert to JPEG
content.append({"prompt_string": "", "multimodal_data": [encoded]}) jpeg_bytes = _to_jpeg(img)
to_encode = jpeg_bytes if jpeg_bytes is not None else img
encoded = base64.b64encode(to_encode).decode("utf-8")
# prompt_string must contain <__media__> placeholder for image tokenization
content.append({
"prompt_string": "<__media__>\n",
"multimodal_data": [encoded],
})
try: try:
response = requests.post( response = requests.post(