Refactor genai (#22752)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions

* Switch to a feature-based roles so it is easier to choose models for different tasks

* Fallback and try llama-swap format

* List models supported by provider

* Cleanup

* Add frontend

* Improve model loading

* Make it possible to update genai without restarting

* Cleanup

* Cleanup

* Mypy
This commit is contained in:
Nicolas Mowen 2026-04-03 17:13:52 -06:00 committed by GitHub
parent bb77a01779
commit 9cb76d0bd9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 363 additions and 140 deletions

View File

@ -125,6 +125,16 @@ def metrics(request: Request):
return Response(content=content, media_type=content_type) return Response(content=content, media_type=content_type)
@router.get(
"/genai/models",
dependencies=[Depends(allow_any_authenticated())],
summary="List available GenAI models",
description="Returns available models for each configured GenAI provider.",
)
def genai_models(request: Request):
return JSONResponse(content=request.app.genai_manager.list_models())
@router.get("/config", dependencies=[Depends(allow_any_authenticated())]) @router.get("/config", dependencies=[Depends(allow_any_authenticated())])
def config(request: Request): def config(request: Request):
config_obj: FrigateConfig = request.app.frigate_config config_obj: FrigateConfig = request.app.frigate_config

View File

@ -520,45 +520,14 @@ async def _execute_get_live_context(
"detections": list(tracked_objects_dict.values()), "detections": list(tracked_objects_dict.values()),
} }
# Grab live frame and handle based on provider configuration # Grab live frame when the chat model supports vision
image_url = await _get_live_frame_image_url(request, camera, allowed_cameras) image_url = await _get_live_frame_image_url(request, camera, allowed_cameras)
if image_url: if image_url:
genai_manager = request.app.genai_manager chat_client = request.app.genai_manager.chat_client
if genai_manager.tool_client is genai_manager.vision_client: if chat_client is not None and chat_client.supports_vision:
# Same provider handles both roles — pass image URL so it can # Pass image URL so it can be injected as a user message
# be injected as a user message (images can't be in tool results) # (images can't be in tool results)
result["_image_url"] = image_url result["_image_url"] = image_url
elif genai_manager.vision_client is not None:
# Separate vision provider — have it describe the image,
# providing detection context so it knows what to focus on
frame_bytes = _decode_data_url(image_url)
if frame_bytes:
detections = result.get("detections", [])
if detections:
detection_lines = []
for d in detections:
parts = [d.get("label", "unknown")]
if d.get("sub_label"):
parts.append(f"({d['sub_label']})")
if d.get("zones"):
parts.append(f"in {', '.join(d['zones'])}")
detection_lines.append(" ".join(parts))
context = (
"The following objects are currently being tracked: "
+ "; ".join(detection_lines)
+ "."
)
else:
context = "No objects are currently being tracked."
description = genai_manager.vision_client._send(
f"Describe what you see in this security camera image. "
f"{context} Focus on the scene, any visible activity, "
f"and details about the tracked objects.",
[frame_bytes],
)
if description:
result["image_description"] = description
return result return result
@ -609,17 +578,6 @@ async def _get_live_frame_image_url(
return None return None
def _decode_data_url(data_url: str) -> Optional[bytes]:
"""Decode a base64 data URL to raw bytes."""
try:
# Format: data:image/jpeg;base64,<data>
_, encoded = data_url.split(",", 1)
return base64.b64decode(encoded)
except (ValueError, Exception) as e:
logger.debug("Failed to decode data URL: %s", e)
return None
async def _execute_set_camera_state( async def _execute_set_camera_state(
request: Request, request: Request,
arguments: Dict[str, Any], arguments: Dict[str, Any],
@ -734,9 +692,9 @@ async def _execute_start_camera_watch(
await require_camera_access(camera, request=request) await require_camera_access(camera, request=request)
genai_manager = request.app.genai_manager genai_manager = request.app.genai_manager
vision_client = genai_manager.vision_client or genai_manager.tool_client chat_client = genai_manager.chat_client
if vision_client is None: if chat_client is None or not chat_client.supports_vision:
return {"error": "No vision/GenAI provider configured."} return {"error": "VLM watch requires a chat model with vision support."}
try: try:
job_id = start_vlm_watch_job( job_id = start_vlm_watch_job(
@ -1070,7 +1028,7 @@ async def chat_completion(
6. Repeats until final answer 6. Repeats until final answer
7. Returns response to user 7. Returns response to user
""" """
genai_client = request.app.genai_manager.tool_client genai_client = request.app.genai_manager.chat_client
if not genai_client: if not genai_client:
return JSONResponse( return JSONResponse(
content={ content={
@ -1381,12 +1339,12 @@ async def start_vlm_monitor(
await require_camera_access(body.camera, request=request) await require_camera_access(body.camera, request=request)
vision_client = genai_manager.vision_client or genai_manager.tool_client chat_client = genai_manager.chat_client
if vision_client is None: if chat_client is None or not chat_client.supports_vision:
return JSONResponse( return JSONResponse(
content={ content={
"success": False, "success": False,
"message": "No vision/GenAI provider configured.", "message": "VLM watch requires a chat model with vision support.",
}, },
status_code=400, status_code=400,
) )

View File

@ -746,7 +746,7 @@ async def set_not_reviewed(
description="Use GenAI to summarize review items over a period of time.", description="Use GenAI to summarize review items over a period of time.",
) )
def generate_review_summary(request: Request, start_ts: float, end_ts: float): def generate_review_summary(request: Request, start_ts: float, end_ts: float):
if not request.app.genai_manager.vision_client: if not request.app.genai_manager.description_client:
return JSONResponse( return JSONResponse(
content=( content=(
{ {

View File

@ -18,8 +18,8 @@ class GenAIProviderEnum(str, Enum):
class GenAIRoleEnum(str, Enum): class GenAIRoleEnum(str, Enum):
tools = "tools" chat = "chat"
vision = "vision" descriptions = "descriptions"
embeddings = "embeddings" embeddings = "embeddings"
@ -49,11 +49,11 @@ class GenAIConfig(FrigateBaseModel):
roles: list[GenAIRoleEnum] = Field( roles: list[GenAIRoleEnum] = Field(
default_factory=lambda: [ default_factory=lambda: [
GenAIRoleEnum.embeddings, GenAIRoleEnum.embeddings,
GenAIRoleEnum.vision, GenAIRoleEnum.descriptions,
GenAIRoleEnum.tools, GenAIRoleEnum.chat,
], ],
title="Roles", title="Roles",
description="GenAI roles (tools, vision, embeddings); one provider per role.", description="GenAI roles (chat, descriptions, embeddings); one provider per role.",
) )
provider_options: dict[str, Any] = Field( provider_options: dict[str, Any] = Field(
default={}, default={},

View File

@ -16,7 +16,7 @@ from frigate.config import CameraConfig, FrigateConfig
from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION from frigate.const import CLIPS_DIR, UPDATE_EVENT_DESCRIPTION
from frigate.data_processing.post.semantic_trigger import SemanticTriggerProcessor from frigate.data_processing.post.semantic_trigger import SemanticTriggerProcessor
from frigate.data_processing.types import PostProcessDataEnum from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient from frigate.genai.manager import GenAIClientManager
from frigate.models import Event from frigate.models import Event
from frigate.types import TrackedObjectUpdateTypesEnum from frigate.types import TrackedObjectUpdateTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed from frigate.util.builtin import EventsPerSecond, InferenceSpeed
@ -41,7 +41,7 @@ class ObjectDescriptionProcessor(PostProcessorApi):
embeddings: "Embeddings", embeddings: "Embeddings",
requestor: InterProcessRequestor, requestor: InterProcessRequestor,
metrics: DataProcessorMetrics, metrics: DataProcessorMetrics,
client: GenAIClient, genai_manager: GenAIClientManager,
semantic_trigger_processor: SemanticTriggerProcessor | None, semantic_trigger_processor: SemanticTriggerProcessor | None,
): ):
super().__init__(config, metrics, None) super().__init__(config, metrics, None)
@ -49,7 +49,7 @@ class ObjectDescriptionProcessor(PostProcessorApi):
self.embeddings = embeddings self.embeddings = embeddings
self.requestor = requestor self.requestor = requestor
self.metrics = metrics self.metrics = metrics
self.genai_client = client self.genai_manager = genai_manager
self.semantic_trigger_processor = semantic_trigger_processor self.semantic_trigger_processor = semantic_trigger_processor
self.tracked_events: dict[str, list[Any]] = {} self.tracked_events: dict[str, list[Any]] = {}
self.early_request_sent: dict[str, bool] = {} self.early_request_sent: dict[str, bool] = {}
@ -198,6 +198,9 @@ class ObjectDescriptionProcessor(PostProcessorApi):
if data_type != PostProcessDataEnum.tracked_object: if data_type != PostProcessDataEnum.tracked_object:
return return
if self.genai_manager.description_client is None:
return
state: str | None = frame_data.get("state", None) state: str | None = frame_data.get("state", None)
if state is not None: if state is not None:
@ -329,7 +332,12 @@ class ObjectDescriptionProcessor(PostProcessorApi):
"""Embed the description for an event.""" """Embed the description for an event."""
start = datetime.datetime.now().timestamp() start = datetime.datetime.now().timestamp()
camera_config = self.config.cameras[str(event.camera)] camera_config = self.config.cameras[str(event.camera)]
description = self.genai_client.generate_object_description( client = self.genai_manager.description_client
if client is None:
return
description = client.generate_object_description(
camera_config, thumbnails, event camera_config, thumbnails, event
) )

View File

@ -22,6 +22,7 @@ from frigate.config.camera.review import GenAIReviewConfig, ImageSourceEnum
from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient from frigate.genai import GenAIClient
from frigate.genai.manager import GenAIClientManager
from frigate.models import Recordings, ReviewSegment from frigate.models import Recordings, ReviewSegment
from frigate.util.builtin import EventsPerSecond, InferenceSpeed from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from frigate.util.image import get_image_from_recording from frigate.util.image import get_image_from_recording
@ -41,12 +42,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
config: FrigateConfig, config: FrigateConfig,
requestor: InterProcessRequestor, requestor: InterProcessRequestor,
metrics: DataProcessorMetrics, metrics: DataProcessorMetrics,
client: GenAIClient, genai_manager: GenAIClientManager,
): ):
super().__init__(config, metrics, None) super().__init__(config, metrics, None)
self.requestor = requestor self.requestor = requestor
self.metrics = metrics self.metrics = metrics
self.genai_client = client self.genai_manager = genai_manager
self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed) self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed)
self.review_desc_dps = EventsPerSecond() self.review_desc_dps = EventsPerSecond()
self.review_desc_dps.start() self.review_desc_dps.start()
@ -63,7 +64,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin. Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
Capped at 20 frames. Capped at 20 frames.
""" """
context_size = self.genai_client.get_context_size() client = self.genai_manager.description_client
if client is None:
return 3
context_size = client.get_context_size()
camera_config = self.config.cameras[camera] camera_config = self.config.cameras[camera]
detect_width = camera_config.detect.width detect_width = camera_config.detect.width
@ -111,6 +117,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
if data_type != PostProcessDataEnum.review: if data_type != PostProcessDataEnum.review:
return return
if self.genai_manager.description_client is None:
return
camera = data["after"]["camera"] camera = data["after"]["camera"]
camera_config = self.config.cameras[camera] camera_config = self.config.cameras[camera]
@ -200,7 +209,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
target=run_analysis, target=run_analysis,
args=( args=(
self.requestor, self.requestor,
self.genai_client, self.genai_manager.description_client,
self.review_desc_speed, self.review_desc_speed,
camera_config, camera_config,
final_data, final_data,
@ -316,7 +325,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
os.path.join(CLIPS_DIR, "genai-requests", f"{start_ts}-{end_ts}") os.path.join(CLIPS_DIR, "genai-requests", f"{start_ts}-{end_ts}")
).mkdir(parents=True, exist_ok=True) ).mkdir(parents=True, exist_ok=True)
return self.genai_client.generate_review_summary( client = self.genai_manager.description_client
if client is None:
return None
return client.generate_review_summary(
start_ts, start_ts,
end_ts, end_ts,
events_with_context, events_with_context,

View File

@ -202,15 +202,13 @@ class EmbeddingMaintainer(threading.Thread):
# post processors # post processors
self.post_processors: list[PostProcessorApi] = [] self.post_processors: list[PostProcessorApi] = []
if self.genai_manager.vision_client is not None and any( if any(c.review.genai.enabled_in_config for c in self.config.cameras.values()):
c.review.genai.enabled_in_config for c in self.config.cameras.values()
):
self.post_processors.append( self.post_processors.append(
ReviewDescriptionProcessor( ReviewDescriptionProcessor(
self.config, self.config,
self.requestor, self.requestor,
self.metrics, self.metrics,
self.genai_manager.vision_client, self.genai_manager,
) )
) )
@ -248,16 +246,14 @@ class EmbeddingMaintainer(threading.Thread):
) )
self.post_processors.append(semantic_trigger_processor) self.post_processors.append(semantic_trigger_processor)
if self.genai_manager.vision_client is not None and any( if any(c.objects.genai.enabled_in_config for c in self.config.cameras.values()):
c.objects.genai.enabled_in_config for c in self.config.cameras.values()
):
self.post_processors.append( self.post_processors.append(
ObjectDescriptionProcessor( ObjectDescriptionProcessor(
self.config, self.config,
self.embeddings, self.embeddings,
self.requestor, self.requestor,
self.metrics, self.metrics,
self.genai_manager.vision_client, self.genai_manager,
semantic_trigger_processor, semantic_trigger_processor,
) )
) )

View File

@ -320,6 +320,22 @@ Guidelines:
"""Submit a request to the provider.""" """Submit a request to the provider."""
return None return None
@property
def supports_vision(self) -> bool:
"""Whether the model supports vision/image input.
Defaults to True for cloud providers. Providers that can detect
capability at runtime (e.g. llama.cpp) should override this.
"""
return True
def list_models(self) -> list[str]:
"""Return the list of model names available from this provider.
Providers should override this to query their backend.
"""
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for this provider in tokens.""" """Get the context window size for this provider in tokens."""
return 4096 return 4096

View File

@ -82,6 +82,14 @@ class OpenAIClient(GenAIClient):
return str(result.choices[0].message.content.strip()) return str(result.choices[0].message.content.strip())
return None return None
def list_models(self) -> list[str]:
"""Return available model IDs from Azure OpenAI."""
try:
return sorted(m.id for m in self.provider.models.list().data)
except Exception as e:
logger.warning("Failed to list Azure OpenAI models: %s", e)
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for Azure OpenAI.""" """Get the context window size for Azure OpenAI."""
return 128000 return 128000

View File

@ -87,6 +87,14 @@ class GeminiClient(GenAIClient):
return None return None
return description return description
def list_models(self) -> list[str]:
"""Return available model names from Gemini."""
try:
return sorted(m.name or "" for m in self.provider.models.list())
except Exception as e:
logger.warning("Failed to list Gemini models: %s", e)
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for Gemini.""" """Get the context window size for Gemini."""
# Gemini Pro Vision has a 1M token context window # Gemini Pro Vision has a 1M token context window

View File

@ -101,7 +101,11 @@ class LlamaCppClient(GenAIClient):
e, e,
) )
# Query /props for context size, modalities, and tool support # Query /props for context size, modalities, and tool support.
# The standard /props?model=<name> endpoint works with llama-server.
# If it fails, try the llama-swap per-model passthrough endpoint which
# returns props for a specific model without requiring it to be loaded.
try:
try: try:
response = requests.get( response = requests.get(
f"{base_url}/props", f"{base_url}/props",
@ -110,6 +114,13 @@ class LlamaCppClient(GenAIClient):
) )
response.raise_for_status() response.raise_for_status()
props = response.json() props = response.json()
except Exception:
response = requests.get(
f"{base_url}/upstream/{configured_model}/props",
timeout=10,
)
response.raise_for_status()
props = response.json()
# Context size from server runtime config # Context size from server runtime config
default_settings = props.get("default_generation_settings", {}) default_settings = props.get("default_generation_settings", {})
@ -126,7 +137,7 @@ class LlamaCppClient(GenAIClient):
chat_caps = props.get("chat_template_caps", {}) chat_caps = props.get("chat_template_caps", {})
self._supports_tools = chat_caps.get("supports_tools", False) self._supports_tools = chat_caps.get("supports_tools", False)
logger.debug( logger.info(
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s", "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
configured_model, configured_model,
self._context_size or "unknown", self._context_size or "unknown",
@ -225,6 +236,23 @@ class LlamaCppClient(GenAIClient):
"""Whether the loaded model supports tool/function calling.""" """Whether the loaded model supports tool/function calling."""
return self._supports_tools return self._supports_tools
def list_models(self) -> list[str]:
"""Return available model IDs from the llama.cpp server."""
if self.provider is None:
return []
try:
response = requests.get(f"{self.provider}/v1/models", timeout=10)
response.raise_for_status()
models = []
for m in response.json().get("data", []):
models.append(m.get("id", "unknown"))
for alias in m.get("aliases", []):
models.append(alias)
return sorted(models)
except Exception as e:
logger.warning("Failed to list llama.cpp models: %s", e)
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for llama.cpp. """Get the context window size for llama.cpp.

View File

@ -1,15 +1,15 @@
"""GenAI client manager for Frigate. """GenAI client manager for Frigate.
Manages GenAI provider clients from Frigate config. Configuration is read only Manages GenAI provider clients from Frigate config. Clients are created lazily
in _update_config(); no other code should read config.genai. Exposes clients on first access so that providers whose roles are never used (e.g. chat when
by role: tool_client, vision_client, embeddings_client. no chat feature is active) are never initialized.
""" """
import logging import logging
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
from frigate.config import FrigateConfig from frigate.config import FrigateConfig
from frigate.config.camera.genai import GenAIRoleEnum from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum
if TYPE_CHECKING: if TYPE_CHECKING:
from frigate.genai import GenAIClient from frigate.genai import GenAIClient
@ -21,68 +21,98 @@ class GenAIClientManager:
"""Manages GenAI provider clients from Frigate config.""" """Manages GenAI provider clients from Frigate config."""
def __init__(self, config: FrigateConfig) -> None: def __init__(self, config: FrigateConfig) -> None:
self._tool_client: Optional[GenAIClient] = None self._configs: dict[str, GenAIConfig] = {}
self._vision_client: Optional[GenAIClient] = None self._role_map: dict[GenAIRoleEnum, str] = {}
self._embeddings_client: Optional[GenAIClient] = None self._clients: dict[str, "GenAIClient"] = {}
self.update_config(config) self.update_config(config)
def update_config(self, config: FrigateConfig) -> None: def update_config(self, config: FrigateConfig) -> None:
"""Build role clients from current Frigate config.genai. """Store provider configs and build the role→name mapping.
Called from __init__ and can be called again when config is reloaded. Called from __init__ and can be called again when config is reloaded.
Each role (tools, vision, embeddings) gets the client for the provider Clients are not created here; they are instantiated lazily on first
that has that role in its roles list. access via a role property or list_models().
""" """
from frigate.genai import PROVIDERS, load_providers from frigate.genai import PROVIDERS, load_providers
self._tool_client = None self._configs = {}
self._vision_client = None self._role_map = {}
self._embeddings_client = None self._clients = {}
if not config.genai: if not config.genai:
return return
load_providers() load_providers()
for _name, genai_cfg in config.genai.items(): for name, genai_cfg in config.genai.items():
if not genai_cfg.provider: if not genai_cfg.provider:
continue continue
provider_cls = PROVIDERS.get(genai_cfg.provider) if genai_cfg.provider not in PROVIDERS:
if not provider_cls:
logger.warning( logger.warning(
"Unknown GenAI provider %s in config, skipping.", "Unknown GenAI provider %s in config, skipping.",
genai_cfg.provider, genai_cfg.provider,
) )
continue continue
self._configs[name] = genai_cfg
for role in genai_cfg.roles:
self._role_map[role] = name
def _get_client(self, name: str) -> "Optional[GenAIClient]":
"""Return the client for *name*, creating it on first access."""
if name in self._clients:
return self._clients[name]
from frigate.genai import PROVIDERS
genai_cfg = self._configs.get(name)
if not genai_cfg:
return None
if not genai_cfg.provider:
return None
provider_cls = PROVIDERS.get(genai_cfg.provider)
if not provider_cls:
return None
try: try:
client = provider_cls(genai_cfg) client: "GenAIClient" = provider_cls(genai_cfg)
except Exception as e: except Exception as e:
logger.exception( logger.exception(
"Failed to create GenAI client for provider %s: %s", "Failed to create GenAI client for provider %s: %s",
genai_cfg.provider, genai_cfg.provider,
e, e,
) )
continue return None
for role in genai_cfg.roles: self._clients[name] = client
if role == GenAIRoleEnum.tools: return client
self._tool_client = client
elif role == GenAIRoleEnum.vision:
self._vision_client = client
elif role == GenAIRoleEnum.embeddings:
self._embeddings_client = client
@property @property
def tool_client(self) -> "Optional[GenAIClient]": def chat_client(self) -> "Optional[GenAIClient]":
"""Client configured for the tools role (e.g. chat with function calling).""" """Client configured for the chat role (e.g. chat with function calling)."""
return self._tool_client name = self._role_map.get(GenAIRoleEnum.chat)
return self._get_client(name) if name else None
@property @property
def vision_client(self) -> "Optional[GenAIClient]": def description_client(self) -> "Optional[GenAIClient]":
"""Client configured for the vision role (e.g. review descriptions, object descriptions).""" """Client configured for the descriptions role (e.g. review descriptions, object descriptions)."""
return self._vision_client name = self._role_map.get(GenAIRoleEnum.descriptions)
return self._get_client(name) if name else None
@property @property
def embeddings_client(self) -> "Optional[GenAIClient]": def embeddings_client(self) -> "Optional[GenAIClient]":
"""Client configured for the embeddings role.""" """Client configured for the embeddings role."""
return self._embeddings_client name = self._role_map.get(GenAIRoleEnum.embeddings)
return self._get_client(name) if name else None
def list_models(self) -> dict[str, list[str]]:
"""Return available models keyed by config entry name."""
result: dict[str, list[str]] = {}
for name in self._configs:
client = self._get_client(name)
if client:
result[name] = client.list_models()
return result

View File

@ -132,6 +132,19 @@ class OllamaClient(GenAIClient):
logger.warning("Ollama returned an error: %s", str(e)) logger.warning("Ollama returned an error: %s", str(e))
return None return None
def list_models(self) -> list[str]:
"""Return available model names from the Ollama server."""
if self.provider is None:
return []
try:
response = self.provider.list()
return sorted(
m.get("name", m.get("model", "")) for m in response.get("models", [])
)
except Exception as e:
logger.warning("Failed to list Ollama models: %s", e)
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for Ollama.""" """Get the context window size for Ollama."""
return int( return int(

View File

@ -86,6 +86,14 @@ class OpenAIClient(GenAIClient):
logger.warning("OpenAI returned an error: %s", str(e)) logger.warning("OpenAI returned an error: %s", str(e))
return None return None
def list_models(self) -> list[str]:
"""Return available model IDs from the OpenAI-compatible API."""
try:
return sorted(m.id for m in self.provider.models.list().data)
except Exception as e:
logger.warning("Failed to list OpenAI models: %s", e)
return []
def get_context_size(self) -> int: def get_context_size(self) -> int:
"""Get the context window size for OpenAI.""" """Get the context window size for OpenAI."""
if self.context_size is not None: if self.context_size is not None:

View File

@ -121,11 +121,12 @@ class VLMWatchRunner(threading.Thread):
def _run_iteration(self) -> float: def _run_iteration(self) -> float:
"""Run one VLM analysis iteration. Returns seconds until next run.""" """Run one VLM analysis iteration. Returns seconds until next run."""
vision_client = ( chat_client = self.genai_manager.chat_client
self.genai_manager.vision_client or self.genai_manager.tool_client if chat_client is None or not chat_client.supports_vision:
logger.warning(
"VLM watch job %s: no chat client with vision support available",
self.job.id,
) )
if vision_client is None:
logger.warning("VLM watch job %s: no vision client available", self.job.id)
return 30 return 30
frame = self.frame_processor.get_current_frame(self.job.camera, {}) frame = self.frame_processor.get_current_frame(self.job.camera, {})
@ -163,7 +164,7 @@ class VLMWatchRunner(threading.Thread):
} }
) )
response = vision_client.chat_with_tools( response = chat_client.chat_with_tools(
messages=self.conversation, messages=self.conversation,
tools=None, tools=None,
tool_choice=None, tool_choice=None,

View File

@ -1485,7 +1485,12 @@
"title": "Timestamp Settings" "title": "Timestamp Settings"
}, },
"searchPlaceholder": "Search...", "searchPlaceholder": "Search...",
"addCustomLabel": "Add custom label..." "addCustomLabel": "Add custom label...",
"genaiModel": {
"placeholder": "Select model…",
"search": "Search models…",
"noModels": "No models available"
}
}, },
"globalConfig": { "globalConfig": {
"title": "Global Configuration", "title": "Global Configuration",

View File

@ -3,14 +3,6 @@ import type { SectionConfigOverrides } from "./types";
const genai: SectionConfigOverrides = { const genai: SectionConfigOverrides = {
base: { base: {
sectionDocs: "/configuration/genai/config", sectionDocs: "/configuration/genai/config",
restartRequired: [
"*.provider",
"*.api_key",
"*.base_url",
"*.model",
"*.provider_options",
"*.runtime_options",
],
advancedFields: ["*.base_url", "*.provider_options", "*.runtime_options"], advancedFields: ["*.base_url", "*.provider_options", "*.runtime_options"],
hiddenFields: ["genai.enabled_in_config"], hiddenFields: ["genai.enabled_in_config"],
uiSchema: { uiSchema: {
@ -37,6 +29,7 @@ const genai: SectionConfigOverrides = {
"ui:options": { size: "lg" }, "ui:options": { size: "lg" },
}, },
"*.model": { "*.model": {
"ui:widget": "genaiModel",
"ui:options": { size: "xs" }, "ui:options": { size: "xs" },
}, },
"*.provider": { "*.provider": {

View File

@ -24,6 +24,7 @@ import { ReviewLabelSwitchesWidget } from "./widgets/ReviewLabelSwitchesWidget";
import { ZoneSwitchesWidget } from "./widgets/ZoneSwitchesWidget"; import { ZoneSwitchesWidget } from "./widgets/ZoneSwitchesWidget";
import { ArrayAsTextWidget } from "./widgets/ArrayAsTextWidget"; import { ArrayAsTextWidget } from "./widgets/ArrayAsTextWidget";
import { FfmpegArgsWidget } from "./widgets/FfmpegArgsWidget"; import { FfmpegArgsWidget } from "./widgets/FfmpegArgsWidget";
import { GenAIModelWidget } from "./widgets/GenAIModelWidget";
import { GenAIRolesWidget } from "./widgets/GenAIRolesWidget"; import { GenAIRolesWidget } from "./widgets/GenAIRolesWidget";
import { InputRolesWidget } from "./widgets/InputRolesWidget"; import { InputRolesWidget } from "./widgets/InputRolesWidget";
import { TimezoneSelectWidget } from "./widgets/TimezoneSelectWidget"; import { TimezoneSelectWidget } from "./widgets/TimezoneSelectWidget";
@ -64,6 +65,7 @@ export const frigateTheme: FrigateTheme = {
ArrayAsTextWidget: ArrayAsTextWidget, ArrayAsTextWidget: ArrayAsTextWidget,
FfmpegArgsWidget: FfmpegArgsWidget, FfmpegArgsWidget: FfmpegArgsWidget,
CameraPathWidget: CameraPathWidget, CameraPathWidget: CameraPathWidget,
genaiModel: GenAIModelWidget,
genaiRoles: GenAIRolesWidget, genaiRoles: GenAIRolesWidget,
inputRoles: InputRolesWidget, inputRoles: InputRolesWidget,
// Custom widgets // Custom widgets

View File

@ -0,0 +1,125 @@
// Combobox widget for genai *.model fields.
// Fetches available models from the provider's backend and shows them in a dropdown.
import { useState, useMemo } from "react";
import type { WidgetProps } from "@rjsf/utils";
import { useTranslation } from "react-i18next";
import useSWR from "swr";
import { Check, ChevronsUpDown } from "lucide-react";
import { cn } from "@/lib/utils";
import { Button } from "@/components/ui/button";
import {
Command,
CommandGroup,
CommandInput,
CommandItem,
CommandList,
} from "@/components/ui/command";
import {
Popover,
PopoverContent,
PopoverTrigger,
} from "@/components/ui/popover";
import { getSizedFieldClassName } from "../utils";
/**
* Extract the provider config entry name from the RJSF widget id.
* Widget ids look like "root_myProvider_model".
*/
function getProviderKey(widgetId: string): string | undefined {
const prefix = "root_";
const suffix = "_model";
if (!widgetId.startsWith(prefix) || !widgetId.endsWith(suffix)) {
return undefined;
}
return widgetId.slice(prefix.length, -suffix.length) || undefined;
}
export function GenAIModelWidget(props: WidgetProps) {
const { id, value, disabled, readonly, onChange, options } = props;
const { t } = useTranslation(["views/settings"]);
const [open, setOpen] = useState(false);
const fieldClassName = getSizedFieldClassName(options, "sm");
const providerKey = useMemo(() => getProviderKey(id), [id]);
const { data: allModels } = useSWR<Record<string, string[]>>("genai/models", {
revalidateOnFocus: false,
});
const models = useMemo(() => {
if (!allModels || !providerKey) return [];
return allModels[providerKey] ?? [];
}, [allModels, providerKey]);
const currentLabel = typeof value === "string" && value ? value : undefined;
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger asChild>
<Button
id={id}
type="button"
variant="outline"
role="combobox"
aria-expanded={open}
disabled={disabled || readonly}
className={cn(
"justify-between font-normal",
!currentLabel && "text-muted-foreground",
fieldClassName,
)}
>
{currentLabel ??
t("configForm.genaiModel.placeholder", {
ns: "views/settings",
defaultValue: "Select model…",
})}
<ChevronsUpDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
</Button>
</PopoverTrigger>
<PopoverContent className="w-[--radix-popover-trigger-width] p-0">
<Command>
<CommandInput
placeholder={t("configForm.genaiModel.search", {
ns: "views/settings",
defaultValue: "Search models…",
})}
/>
<CommandList>
{models.length > 0 ? (
<CommandGroup>
{models.map((model) => (
<CommandItem
key={model}
value={model}
onSelect={() => {
onChange(model);
setOpen(false);
}}
>
<Check
className={cn(
"mr-2 h-4 w-4",
value === model ? "opacity-100" : "opacity-0",
)}
/>
{model}
</CommandItem>
))}
</CommandGroup>
) : (
<div className="p-4 text-center text-sm text-muted-foreground">
{t("configForm.genaiModel.noModels", {
ns: "views/settings",
defaultValue: "No models available",
})}
</div>
)}
</CommandList>
</Command>
</PopoverContent>
</Popover>
);
}

View File

@ -4,7 +4,7 @@ import { useTranslation } from "react-i18next";
import { Switch } from "@/components/ui/switch"; import { Switch } from "@/components/ui/switch";
import type { ConfigFormContext } from "@/types/configForm"; import type { ConfigFormContext } from "@/types/configForm";
const GENAI_ROLES = ["embeddings", "vision", "tools"] as const; const GENAI_ROLES = ["embeddings", "descriptions", "chat"] as const;
function normalizeValue(value: unknown): string[] { function normalizeValue(value: unknown): string[] {
if (Array.isArray(value)) { if (Array.isArray(value)) {