Compare commits

...

6 Commits

Author SHA1 Message Date
Nicolas Mowen
4bf1492e2d
Merge fb4fe8c430 into 1b57fb15a7 2025-11-28 21:50:32 +00:00
Nicolas Mowen
fb4fe8c430 Fine tune behavior 2025-11-28 14:50:26 -07:00
Josh Hawkins
ecb59ff943 prevent crash when a camera doesn't support onvif imaging service required for focus support 2025-11-28 15:39:42 -06:00
Nicolas Mowen
4dfea29f28 Don't save redundant state changes 2025-11-28 14:31:16 -07:00
Josh Hawkins
6c172ed095 remove incorrect constraint for viewer as username
should be able to change anyone's role other than admin
2025-11-28 14:46:21 -06:00
Josh Hawkins
cc76a57ce5 add transcription faq 2025-11-28 12:50:25 -06:00
4 changed files with 110 additions and 46 deletions

View File

@ -157,3 +157,19 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic
:::
Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
#### FAQ
1. Why doesn't Frigate automatically transcribe all `speech` events?
Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. Thats a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise.
Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware.
If you hear speech thats actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control.
2. Why don't you save live transcription text and use that for `speech` events?
Theres no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable.
Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. Thats why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event.

View File

@ -99,6 +99,42 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
if self.inference_speed:
self.inference_speed.update(duration)
def _should_save_image(
self, camera: str, detected_state: str, score: float = 1.0
) -> bool:
"""
Determine if we should save the image for training.
Save when:
- State is changing or being verified (regardless of score)
- Score is less than 100% (even if state matches, useful for training)
Don't save when:
- State is stable (matches current_state) AND score is 100%
"""
if camera not in self.state_history:
# First detection for this camera, save it
return True
verification = self.state_history[camera]
current_state = verification.get("current_state")
pending_state = verification.get("pending_state")
# Save if there's a pending state change being verified
if pending_state is not None:
return True
# Save if the detected state differs from the current verified state
# (state is changing)
if current_state is not None and detected_state != current_state:
return True
# If score is less than 100%, save even if state matches
# (useful for training to improve confidence)
if score < 1.0:
return True
# Don't save if state is stable (detected_state == current_state) AND score is 100%
return False
def verify_state_change(self, camera: str, detected_state: str) -> str | None:
"""
Verify state change requires 3 consecutive identical states before publishing.
@ -212,14 +248,16 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
return
if self.interpreter is None:
write_classification_attempt(
self.train_dir,
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
"none-none",
now,
"unknown",
0.0,
)
# When interpreter is None, always save (score is 0.0, which is < 1.0)
if self._should_save_image(camera, "unknown", 0.0):
write_classification_attempt(
self.train_dir,
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
"none-none",
now,
"unknown",
0.0,
)
return
input = np.expand_dims(resized_frame, axis=0)
@ -236,14 +274,17 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
score = round(probs[best_id], 2)
self.__update_metrics(datetime.datetime.now().timestamp() - now)
write_classification_attempt(
self.train_dir,
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
"none-none",
now,
self.labelmap[best_id],
score,
)
detected_state = self.labelmap[best_id]
if self._should_save_image(camera, detected_state, score):
write_classification_attempt(
self.train_dir,
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
"none-none",
now,
detected_state,
score,
)
if score < self.model_config.threshold:
logger.debug(
@ -251,7 +292,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
)
return
detected_state = self.labelmap[best_id]
verified_state = self.verify_state_change(camera, detected_state)
if verified_state is not None:

View File

@ -190,7 +190,11 @@ class OnvifController:
ptz: ONVIFService = await onvif.create_ptz_service()
self.cams[camera_name]["ptz"] = ptz
imaging: ONVIFService = await onvif.create_imaging_service()
try:
imaging: ONVIFService = await onvif.create_imaging_service()
except (Fault, ONVIFError, TransportError, Exception) as e:
logger.debug(f"Imaging service not supported for {camera_name}: {e}")
imaging = None
self.cams[camera_name]["imaging"] = imaging
try:
video_sources = await media.GetVideoSources()
@ -381,7 +385,10 @@ class OnvifController:
f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
)
if self.cams[camera_name]["video_source_token"] is not None:
if (
self.cams[camera_name]["video_source_token"] is not None
and imaging is not None
):
try:
imaging_capabilities = await imaging.GetImagingSettings(
{"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
@ -421,6 +428,7 @@ class OnvifController:
if (
"focus" in self.cams[camera_name]["features"]
and self.cams[camera_name]["video_source_token"]
and self.cams[camera_name]["imaging"] is not None
):
try:
stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
@ -648,6 +656,7 @@ class OnvifController:
if (
"focus" not in self.cams[camera_name]["features"]
or not self.cams[camera_name]["video_source_token"]
or self.cams[camera_name]["imaging"] is None
):
logger.error(f"{camera_name} does not support ONVIF continuous focus.")
return

View File

@ -478,33 +478,32 @@ export default function AuthenticationView({
<TableCell className="text-right">
<TooltipProvider>
<div className="flex items-center justify-end gap-2">
{user.username !== "admin" &&
user.username !== "viewer" && (
<Tooltip>
<TooltipTrigger asChild>
<Button
size="sm"
variant="outline"
className="h-8 px-2"
onClick={() => {
setSelectedUser(user.username);
setSelectedUserRole(
user.role || "viewer",
);
setShowRoleChange(true);
}}
>
<LuUserCog className="size-3.5" />
<span className="ml-1.5 hidden sm:inline-block">
{t("role.title", { ns: "common" })}
</span>
</Button>
</TooltipTrigger>
<TooltipContent>
<p>{t("users.table.changeRole")}</p>
</TooltipContent>
</Tooltip>
)}
{user.username !== "admin" && (
<Tooltip>
<TooltipTrigger asChild>
<Button
size="sm"
variant="outline"
className="h-8 px-2"
onClick={() => {
setSelectedUser(user.username);
setSelectedUserRole(
user.role || "viewer",
);
setShowRoleChange(true);
}}
>
<LuUserCog className="size-3.5" />
<span className="ml-1.5 hidden sm:inline-block">
{t("role.title", { ns: "common" })}
</span>
</Button>
</TooltipTrigger>
<TooltipContent>
<p>{t("users.table.changeRole")}</p>
</TooltipContent>
</Tooltip>
)}
<Tooltip>
<TooltipTrigger asChild>