This commit is contained in:
Josh Hawkins 2026-05-21 21:23:15 -05:00 committed by GitHub
commit 3df59a7847
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 285 additions and 51 deletions

View File

@ -49,15 +49,14 @@ You should have at least 8 GB of RAM available (or VRAM if running on GPU) to ru
### Model Types: Instruct vs Thinking
Most vision-language models are available as **instruct** models, which are fine-tuned to follow instructions and respond concisely to prompts. However, some models (such as certain Qwen-VL or minigpt variants) offer both **instruct** and **thinking** versions.
Vision-language models come in **instruct** variants (fine-tuned to follow instructions and respond concisely), **thinking** variants (fine-tuned for free-form, speculative reasoning), and **hybrid** variants that support both modes per request. Most modern vision-language models are hybrid.
- **Instruct models** are always recommended for use with Frigate. These models generate direct, relevant, actionable descriptions that best fit Frigate's object and event summary use case.
- **Reasoning / Thinking models** are fine-tuned for more free-form, open-ended, and speculative outputs, which are typically not concise and may not provide the practical summaries Frigate expects. For this reason, Frigate does **not** recommend or support using thinking models.
Frigate manages reasoning per task automatically:
Some models are labeled as **hybrid** (capable of both thinking and instruct tasks). In these cases, it is recommended to disable reasoning / thinking, which is generally model specific (see your models documentation).
- **Description tasks** (object descriptions, review descriptions, review summaries) are synthesis-only and benefit from concise, direct output, so Frigate disables thinking for these calls when the model exposes a per-request toggle.
- **Chat** lets you toggle thinking on or off from the composer when the configured model supports it.
**Recommendation:**
Always select the `-instruct` or documented instruct/tagged variant of any model you use in your Frigate configuration. If in doubt, refer to your model provider's documentation or model library for guidance on the correct model variant to use.
You can use a pure instruct, hybrid, or thinking-capable model with Frigate — no extra configuration is required to disable thinking for descriptions.
### llama.cpp

View File

@ -238,6 +238,10 @@ class DebugReplayManager:
zone_dump.setdefault("coordinates", zone_config.coordinates)
zones_dict[zone_name] = zone_dump
# Extract LPR and face recognition configs
lpr_dict = source_config.lpr.model_dump()
face_recognition_dict = source_config.face_recognition.model_dump()
# Extract motion config (exclude runtime fields)
motion_dict = {}
if source_config.motion is not None:
@ -287,8 +291,8 @@ class DebugReplayManager:
},
"birdseye": {"enabled": False},
"audio": {"enabled": False},
"lpr": {"enabled": False},
"face_recognition": {"enabled": False},
"lpr": lpr_dict,
"face_recognition": face_recognition_dict,
}
def _cleanup_db(self, camera_name: str) -> None:

View File

@ -1,5 +1,7 @@
"""Gemini Provider for Frigate AI."""
import base64
import binascii
import json
import logging
from typing import Any, AsyncGenerator, Optional
@ -14,6 +16,27 @@ from frigate.genai import GenAIClient, register_genai_provider
logger = logging.getLogger(__name__)
def _decode_thought_signature(value: Any) -> Optional[bytes]:
"""Decode a base64-encoded thought_signature carried across conversation turns."""
if not value:
return None
if isinstance(value, bytes):
return value
if isinstance(value, str):
try:
return base64.b64decode(value)
except (binascii.Error, ValueError):
return None
return None
def _encode_thought_signature(signature: Optional[bytes]) -> Optional[str]:
"""Encode bytes thought_signature as base64 so it survives JSON-friendly transport."""
if not signature:
return None
return base64.b64encode(signature).decode("ascii")
def _stats_from_gemini_usage(usage: Any) -> Optional[dict[str, Any]]:
"""Build a stats dict from a Gemini usage_metadata object."""
prompt_tokens = getattr(usage, "prompt_token_count", None)
@ -169,11 +192,17 @@ class GeminiClient(GenAIClient):
if not isinstance(tc_args, dict):
tc_args = {}
if tc_name:
parts.append(
types.Part.from_function_call(
name=tc_name, args=tc_args
)
fc_part = types.Part.from_function_call(
name=tc_name, args=tc_args
)
# Thinking-capable Gemini models require the original
# thought_signature to be echoed back on functionCall
# parts after a tool response, or the next request
# fails with INVALID_ARGUMENT.
sig = _decode_thought_signature(tc.get("thought_signature"))
if sig:
fc_part.thought_signature = sig
parts.append(fc_part)
if not parts:
parts.append(types.Part.from_text(text=" "))
gemini_messages.append(types.Content(role="model", parts=parts))
@ -310,6 +339,9 @@ class GeminiClient(GenAIClient):
"id": part.function_call.name or "",
"name": part.function_call.name or "",
"arguments": arguments,
"thought_signature": _encode_thought_signature(
getattr(part, "thought_signature", None)
),
}
)
@ -418,11 +450,17 @@ class GeminiClient(GenAIClient):
if not isinstance(tc_args, dict):
tc_args = {}
if tc_name:
parts.append(
types.Part.from_function_call(
name=tc_name, args=tc_args
)
fc_part = types.Part.from_function_call(
name=tc_name, args=tc_args
)
# Thinking-capable Gemini models require the original
# thought_signature to be echoed back on functionCall
# parts after a tool response, or the next request
# fails with INVALID_ARGUMENT.
sig = _decode_thought_signature(tc.get("thought_signature"))
if sig:
fc_part.thought_signature = sig
parts.append(fc_part)
if not parts:
parts.append(types.Part.from_text(text=" "))
gemini_messages.append(types.Content(role="model", parts=parts))
@ -588,6 +626,7 @@ class GeminiClient(GenAIClient):
"id": tool_call_id,
"name": tool_call_name,
"arguments": "",
"thought_signature": None,
}
# Accumulate arguments
@ -598,6 +637,13 @@ class GeminiClient(GenAIClient):
else str(arguments)
)
# Capture latest thought_signature for this call
chunk_sig = getattr(part, "thought_signature", None)
if chunk_sig:
tool_calls_by_index[found_index][
"thought_signature"
] = chunk_sig
# Build final message
full_content = "".join(content_parts).strip() or None
full_reasoning = "".join(reasoning_parts).strip() or None
@ -618,6 +664,9 @@ class GeminiClient(GenAIClient):
"id": tc["id"],
"name": tc["name"],
"arguments": parsed_args,
"thought_signature": _encode_thought_signature(
tc.get("thought_signature")
),
}
)
finish_reason = "tool_calls"

View File

@ -69,6 +69,14 @@ def build_assistant_message_for_conversation(
"name": tc["name"],
"arguments": json.dumps(tc.get("arguments") or {}),
},
# Gemini-only: opaque signature that must be echoed back on
# the same functionCall part in the next turn. Other providers
# do not set or read this.
**(
{"thought_signature": tc["thought_signature"]}
if tc.get("thought_signature")
else {}
),
}
for tc in tool_calls_raw
]

View File

@ -167,8 +167,9 @@ class DetectorRunner(FrigateProcess):
# detect and send the output
self.start_time.value = datetime.datetime.now().timestamp()
mono_start = time.monotonic()
detections = object_detector.detect_raw(input_frame)
duration = datetime.datetime.now().timestamp() - self.start_time.value
duration = time.monotonic() - mono_start
frame_manager.close(connection_id)
if connection_id not in self.outputs:

View File

@ -1331,6 +1331,8 @@ class PtzAutoTracker:
return self.tracked_object[camera]["region"]
def autotrack_object(self, camera: str, obj: TrackedObject):
if camera not in self.config.cameras:
return
camera_config = self.config.cameras[camera]
if camera_config.onvif.autotracking.enabled:

View File

@ -484,11 +484,15 @@
"reorderHandle": "Drag to reorder",
"saving": "Saving…",
"saved": "Saved",
"friendlyName": {
"edit": "Edit camera display name",
"title": "Edit Display Name",
"description": "Set the friendly name shown for this camera throughout the Frigate UI. Leave blank to use the camera ID.",
"rename": "Rename"
"details": {
"edit": "Edit camera details",
"title": "Edit Camera Details",
"description": "Update the display name and external URL used for this camera throughout the Frigate UI.",
"friendlyNameLabel": "Display Name",
"friendlyNameHelp": "Friendly name shown for this camera throughout the Frigate UI. Leave blank to use the camera ID.",
"webuiUrlLabel": "Camera Web UI URL",
"webuiUrlHelp": "URL to visit the camera's web UI directly from the Debug view. Leave blank to disable the link.",
"webuiUrlInvalid": "Must be a valid URL (e.g., https://example.com)."
}
},
"cameraConfig": {

View File

@ -36,7 +36,15 @@ import axios from "axios";
import ActivityIndicator from "@/components/indicators/activity-indicator";
import RestartDialog from "@/components/overlay/dialog/RestartDialog";
import RestartRequiredIndicator from "@/components/indicators/RestartRequiredIndicator";
import TextEntryDialog from "@/components/overlay/dialog/TextEntryDialog";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import {
Tooltip,
TooltipContent,
@ -53,6 +61,17 @@ import {
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import {
Form,
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from "@/components/ui/form";
import { useForm } from "react-hook-form";
import { zodResolver } from "@hookform/resolvers/zod";
import { z } from "zod";
const REORDER_SAVED_INDICATOR_MS = 1500;
@ -482,7 +501,7 @@ function EnabledCameraRow({
<LuGripVertical className="size-4" />
</button>
<CameraNameLabel camera={camera} />
<CameraFriendlyNameEditor
<CameraDetailsEditor
cameraName={camera}
onConfigChanged={onConfigChanged}
/>
@ -519,25 +538,91 @@ function CameraEnableSwitch({ cameraName }: CameraEnableSwitchProps) {
);
}
type CameraFriendlyNameEditorProps = {
type CameraDetailsEditorProps = {
cameraName: string;
onConfigChanged: () => Promise<unknown>;
};
function CameraFriendlyNameEditor({
type CameraDetailsFormValues = {
friendlyName: string;
webuiUrl: string;
};
function CameraDetailsEditor({
cameraName,
onConfigChanged,
}: CameraFriendlyNameEditorProps) {
}: CameraDetailsEditorProps) {
const { t } = useTranslation(["views/settings", "common"]);
const { data: config } = useSWR<FrigateConfig>("config");
const [open, setOpen] = useState(false);
const [isSaving, setIsSaving] = useState(false);
const currentFriendlyName = config?.cameras?.[cameraName]?.friendly_name;
const currentWebuiUrl = config?.cameras?.[cameraName]?.webui_url;
const onSave = useCallback(
async (text: string) => {
const formSchema = useMemo(
() =>
z.object({
friendlyName: z.string(),
webuiUrl: z.string().refine(
(val) => {
const trimmed = val.trim();
if (!trimmed) return true;
try {
new URL(trimmed);
return true;
} catch {
return false;
}
},
{
message: t("cameraManagement.streams.details.webuiUrlInvalid", {
ns: "views/settings",
}),
},
),
}),
[t],
);
const form = useForm<CameraDetailsFormValues>({
resolver: zodResolver(formSchema),
defaultValues: {
friendlyName: currentFriendlyName ?? "",
webuiUrl: currentWebuiUrl ?? "",
},
});
// Reset form values from config whenever the dialog is opened.
useEffect(() => {
if (open) {
form.reset({
friendlyName: currentFriendlyName ?? "",
webuiUrl: currentWebuiUrl ?? "",
});
}
}, [open, currentFriendlyName, currentWebuiUrl, form]);
const onSubmit = useCallback(
async (values: CameraDetailsFormValues) => {
if (isSaving) return;
// only send fields the user actually changed
const newFriendly = values.friendlyName.trim() || null;
const newWebui = values.webuiUrl.trim() || null;
const cameraUpdate: Record<string, string | null> = {};
if (newFriendly !== (currentFriendlyName ?? null)) {
cameraUpdate.friendly_name = newFriendly;
}
if (newWebui !== (currentWebuiUrl ?? null)) {
cameraUpdate.webui_url = newWebui;
}
if (Object.keys(cameraUpdate).length === 0) {
setOpen(false);
return;
}
setIsSaving(true);
try {
@ -545,9 +630,7 @@ function CameraFriendlyNameEditor({
requires_restart: 0,
config_data: {
cameras: {
[cameraName]: {
friendly_name: text.trim() || null,
},
[cameraName]: cameraUpdate,
},
},
});
@ -573,10 +656,17 @@ function CameraFriendlyNameEditor({
setIsSaving(false);
}
},
[cameraName, isSaving, onConfigChanged, t],
[
cameraName,
currentFriendlyName,
currentWebuiUrl,
isSaving,
onConfigChanged,
t,
],
);
const renameLabel = t("cameraManagement.streams.friendlyName.rename", {
const editLabel = t("cameraManagement.streams.details.edit", {
ns: "views/settings",
});
@ -588,30 +678,107 @@ function CameraFriendlyNameEditor({
variant="ghost"
size="icon"
className="size-7"
aria-label={renameLabel}
aria-label={editLabel}
onClick={() => setOpen(true)}
disabled={isSaving}
>
<LuPencil className="size-3.5" />
</Button>
</TooltipTrigger>
<TooltipContent>{renameLabel}</TooltipContent>
<TooltipContent>{editLabel}</TooltipContent>
</Tooltip>
<TextEntryDialog
open={open}
setOpen={setOpen}
title={t("cameraManagement.streams.friendlyName.title", {
ns: "views/settings",
})}
description={t("cameraManagement.streams.friendlyName.description", {
ns: "views/settings",
})}
defaultValue={currentFriendlyName ?? ""}
placeholder={currentFriendlyName ? undefined : cameraName}
allowEmpty
isSaving={isSaving}
onSave={onSave}
/>
<Dialog open={open} onOpenChange={setOpen}>
<DialogContent>
<DialogHeader>
<DialogTitle>
{t("cameraManagement.streams.details.title", {
ns: "views/settings",
})}
</DialogTitle>
<DialogDescription>
{t("cameraManagement.streams.details.description", {
ns: "views/settings",
})}
</DialogDescription>
</DialogHeader>
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-4">
<FormField
control={form.control}
name="friendlyName"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("cameraManagement.streams.details.friendlyNameLabel", {
ns: "views/settings",
})}
</FormLabel>
<FormControl>
<Input
{...field}
placeholder={cameraName}
disabled={isSaving}
autoFocus
/>
</FormControl>
<p className="text-xs text-muted-foreground">
{t("cameraManagement.streams.details.friendlyNameHelp", {
ns: "views/settings",
})}
</p>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="webuiUrl"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("cameraManagement.streams.details.webuiUrlLabel", {
ns: "views/settings",
})}
</FormLabel>
<FormControl>
<Input
{...field}
placeholder="https://"
disabled={isSaving}
/>
</FormControl>
<p className="text-xs text-muted-foreground">
{t("cameraManagement.streams.details.webuiUrlHelp", {
ns: "views/settings",
})}
</p>
<FormMessage />
</FormItem>
)}
/>
<DialogFooter className="pt-2">
<Button
type="button"
disabled={isSaving}
onClick={() => setOpen(false)}
>
{t("button.cancel", { ns: "common" })}
</Button>
<Button variant="select" type="submit" disabled={isSaving}>
{isSaving ? (
<div className="flex flex-row items-center gap-2">
<ActivityIndicator className="size-4" />
<span>{t("button.saving", { ns: "common" })}</span>
</div>
) : (
t("button.save", { ns: "common" })
)}
</Button>
</DialogFooter>
</form>
</Form>
</DialogContent>
</Dialog>
</>
);
}