mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-12-19 03:26:41 +03:00
Compare commits
No commits in common. "fb4fe8c4302602a7db742d86408fc988e0ecb5c2" and "62e5546c577a67fcb1f89e9a424afa1b9d1f71fc" have entirely different histories.
fb4fe8c430
...
62e5546c57
@ -157,19 +157,3 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic
|
|||||||
:::
|
:::
|
||||||
|
|
||||||
Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
|
Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
|
||||||
|
|
||||||
#### FAQ
|
|
||||||
|
|
||||||
1. Why doesn't Frigate automatically transcribe all `speech` events?
|
|
||||||
|
|
||||||
Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. That’s a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise.
|
|
||||||
|
|
||||||
Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware.
|
|
||||||
|
|
||||||
If you hear speech that’s actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control.
|
|
||||||
|
|
||||||
2. Why don't you save live transcription text and use that for `speech` events?
|
|
||||||
|
|
||||||
There’s no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable.
|
|
||||||
|
|
||||||
Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. That’s why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event.
|
|
||||||
|
|||||||
@ -99,42 +99,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
|||||||
if self.inference_speed:
|
if self.inference_speed:
|
||||||
self.inference_speed.update(duration)
|
self.inference_speed.update(duration)
|
||||||
|
|
||||||
def _should_save_image(
|
|
||||||
self, camera: str, detected_state: str, score: float = 1.0
|
|
||||||
) -> bool:
|
|
||||||
"""
|
|
||||||
Determine if we should save the image for training.
|
|
||||||
Save when:
|
|
||||||
- State is changing or being verified (regardless of score)
|
|
||||||
- Score is less than 100% (even if state matches, useful for training)
|
|
||||||
Don't save when:
|
|
||||||
- State is stable (matches current_state) AND score is 100%
|
|
||||||
"""
|
|
||||||
if camera not in self.state_history:
|
|
||||||
# First detection for this camera, save it
|
|
||||||
return True
|
|
||||||
|
|
||||||
verification = self.state_history[camera]
|
|
||||||
current_state = verification.get("current_state")
|
|
||||||
pending_state = verification.get("pending_state")
|
|
||||||
|
|
||||||
# Save if there's a pending state change being verified
|
|
||||||
if pending_state is not None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Save if the detected state differs from the current verified state
|
|
||||||
# (state is changing)
|
|
||||||
if current_state is not None and detected_state != current_state:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If score is less than 100%, save even if state matches
|
|
||||||
# (useful for training to improve confidence)
|
|
||||||
if score < 1.0:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Don't save if state is stable (detected_state == current_state) AND score is 100%
|
|
||||||
return False
|
|
||||||
|
|
||||||
def verify_state_change(self, camera: str, detected_state: str) -> str | None:
|
def verify_state_change(self, camera: str, detected_state: str) -> str | None:
|
||||||
"""
|
"""
|
||||||
Verify state change requires 3 consecutive identical states before publishing.
|
Verify state change requires 3 consecutive identical states before publishing.
|
||||||
@ -248,16 +212,14 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.interpreter is None:
|
if self.interpreter is None:
|
||||||
# When interpreter is None, always save (score is 0.0, which is < 1.0)
|
write_classification_attempt(
|
||||||
if self._should_save_image(camera, "unknown", 0.0):
|
self.train_dir,
|
||||||
write_classification_attempt(
|
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
||||||
self.train_dir,
|
"none-none",
|
||||||
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
now,
|
||||||
"none-none",
|
"unknown",
|
||||||
now,
|
0.0,
|
||||||
"unknown",
|
)
|
||||||
0.0,
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
input = np.expand_dims(resized_frame, axis=0)
|
input = np.expand_dims(resized_frame, axis=0)
|
||||||
@ -274,17 +236,14 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
|||||||
score = round(probs[best_id], 2)
|
score = round(probs[best_id], 2)
|
||||||
self.__update_metrics(datetime.datetime.now().timestamp() - now)
|
self.__update_metrics(datetime.datetime.now().timestamp() - now)
|
||||||
|
|
||||||
detected_state = self.labelmap[best_id]
|
write_classification_attempt(
|
||||||
|
self.train_dir,
|
||||||
if self._should_save_image(camera, detected_state, score):
|
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
||||||
write_classification_attempt(
|
"none-none",
|
||||||
self.train_dir,
|
now,
|
||||||
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
self.labelmap[best_id],
|
||||||
"none-none",
|
score,
|
||||||
now,
|
)
|
||||||
detected_state,
|
|
||||||
score,
|
|
||||||
)
|
|
||||||
|
|
||||||
if score < self.model_config.threshold:
|
if score < self.model_config.threshold:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@ -292,6 +251,7 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
detected_state = self.labelmap[best_id]
|
||||||
verified_state = self.verify_state_change(camera, detected_state)
|
verified_state = self.verify_state_change(camera, detected_state)
|
||||||
|
|
||||||
if verified_state is not None:
|
if verified_state is not None:
|
||||||
|
|||||||
@ -190,11 +190,7 @@ class OnvifController:
|
|||||||
ptz: ONVIFService = await onvif.create_ptz_service()
|
ptz: ONVIFService = await onvif.create_ptz_service()
|
||||||
self.cams[camera_name]["ptz"] = ptz
|
self.cams[camera_name]["ptz"] = ptz
|
||||||
|
|
||||||
try:
|
imaging: ONVIFService = await onvif.create_imaging_service()
|
||||||
imaging: ONVIFService = await onvif.create_imaging_service()
|
|
||||||
except (Fault, ONVIFError, TransportError, Exception) as e:
|
|
||||||
logger.debug(f"Imaging service not supported for {camera_name}: {e}")
|
|
||||||
imaging = None
|
|
||||||
self.cams[camera_name]["imaging"] = imaging
|
self.cams[camera_name]["imaging"] = imaging
|
||||||
try:
|
try:
|
||||||
video_sources = await media.GetVideoSources()
|
video_sources = await media.GetVideoSources()
|
||||||
@ -385,10 +381,7 @@ class OnvifController:
|
|||||||
f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
|
f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if self.cams[camera_name]["video_source_token"] is not None:
|
||||||
self.cams[camera_name]["video_source_token"] is not None
|
|
||||||
and imaging is not None
|
|
||||||
):
|
|
||||||
try:
|
try:
|
||||||
imaging_capabilities = await imaging.GetImagingSettings(
|
imaging_capabilities = await imaging.GetImagingSettings(
|
||||||
{"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
|
{"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
|
||||||
@ -428,7 +421,6 @@ class OnvifController:
|
|||||||
if (
|
if (
|
||||||
"focus" in self.cams[camera_name]["features"]
|
"focus" in self.cams[camera_name]["features"]
|
||||||
and self.cams[camera_name]["video_source_token"]
|
and self.cams[camera_name]["video_source_token"]
|
||||||
and self.cams[camera_name]["imaging"] is not None
|
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
|
stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
|
||||||
@ -656,7 +648,6 @@ class OnvifController:
|
|||||||
if (
|
if (
|
||||||
"focus" not in self.cams[camera_name]["features"]
|
"focus" not in self.cams[camera_name]["features"]
|
||||||
or not self.cams[camera_name]["video_source_token"]
|
or not self.cams[camera_name]["video_source_token"]
|
||||||
or self.cams[camera_name]["imaging"] is None
|
|
||||||
):
|
):
|
||||||
logger.error(f"{camera_name} does not support ONVIF continuous focus.")
|
logger.error(f"{camera_name} does not support ONVIF continuous focus.")
|
||||||
return
|
return
|
||||||
|
|||||||
@ -478,32 +478,33 @@ export default function AuthenticationView({
|
|||||||
<TableCell className="text-right">
|
<TableCell className="text-right">
|
||||||
<TooltipProvider>
|
<TooltipProvider>
|
||||||
<div className="flex items-center justify-end gap-2">
|
<div className="flex items-center justify-end gap-2">
|
||||||
{user.username !== "admin" && (
|
{user.username !== "admin" &&
|
||||||
<Tooltip>
|
user.username !== "viewer" && (
|
||||||
<TooltipTrigger asChild>
|
<Tooltip>
|
||||||
<Button
|
<TooltipTrigger asChild>
|
||||||
size="sm"
|
<Button
|
||||||
variant="outline"
|
size="sm"
|
||||||
className="h-8 px-2"
|
variant="outline"
|
||||||
onClick={() => {
|
className="h-8 px-2"
|
||||||
setSelectedUser(user.username);
|
onClick={() => {
|
||||||
setSelectedUserRole(
|
setSelectedUser(user.username);
|
||||||
user.role || "viewer",
|
setSelectedUserRole(
|
||||||
);
|
user.role || "viewer",
|
||||||
setShowRoleChange(true);
|
);
|
||||||
}}
|
setShowRoleChange(true);
|
||||||
>
|
}}
|
||||||
<LuUserCog className="size-3.5" />
|
>
|
||||||
<span className="ml-1.5 hidden sm:inline-block">
|
<LuUserCog className="size-3.5" />
|
||||||
{t("role.title", { ns: "common" })}
|
<span className="ml-1.5 hidden sm:inline-block">
|
||||||
</span>
|
{t("role.title", { ns: "common" })}
|
||||||
</Button>
|
</span>
|
||||||
</TooltipTrigger>
|
</Button>
|
||||||
<TooltipContent>
|
</TooltipTrigger>
|
||||||
<p>{t("users.table.changeRole")}</p>
|
<TooltipContent>
|
||||||
</TooltipContent>
|
<p>{t("users.table.changeRole")}</p>
|
||||||
</Tooltip>
|
</TooltipContent>
|
||||||
)}
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
|
||||||
<Tooltip>
|
<Tooltip>
|
||||||
<TooltipTrigger asChild>
|
<TooltipTrigger asChild>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user