mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-12-06 21:44:13 +03:00
Merge fb4fe8c430 into 1a75251ffb
This commit is contained in:
commit
481469bde2
@ -157,3 +157,19 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic
|
||||
:::
|
||||
|
||||
Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
|
||||
|
||||
#### FAQ
|
||||
|
||||
1. Why doesn't Frigate automatically transcribe all `speech` events?
|
||||
|
||||
Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. That’s a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise.
|
||||
|
||||
Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware.
|
||||
|
||||
If you hear speech that’s actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control.
|
||||
|
||||
2. Why don't you save live transcription text and use that for `speech` events?
|
||||
|
||||
There’s no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable.
|
||||
|
||||
Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. That’s why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event.
|
||||
|
||||
@ -99,6 +99,42 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
||||
if self.inference_speed:
|
||||
self.inference_speed.update(duration)
|
||||
|
||||
def _should_save_image(
|
||||
self, camera: str, detected_state: str, score: float = 1.0
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if we should save the image for training.
|
||||
Save when:
|
||||
- State is changing or being verified (regardless of score)
|
||||
- Score is less than 100% (even if state matches, useful for training)
|
||||
Don't save when:
|
||||
- State is stable (matches current_state) AND score is 100%
|
||||
"""
|
||||
if camera not in self.state_history:
|
||||
# First detection for this camera, save it
|
||||
return True
|
||||
|
||||
verification = self.state_history[camera]
|
||||
current_state = verification.get("current_state")
|
||||
pending_state = verification.get("pending_state")
|
||||
|
||||
# Save if there's a pending state change being verified
|
||||
if pending_state is not None:
|
||||
return True
|
||||
|
||||
# Save if the detected state differs from the current verified state
|
||||
# (state is changing)
|
||||
if current_state is not None and detected_state != current_state:
|
||||
return True
|
||||
|
||||
# If score is less than 100%, save even if state matches
|
||||
# (useful for training to improve confidence)
|
||||
if score < 1.0:
|
||||
return True
|
||||
|
||||
# Don't save if state is stable (detected_state == current_state) AND score is 100%
|
||||
return False
|
||||
|
||||
def verify_state_change(self, camera: str, detected_state: str) -> str | None:
|
||||
"""
|
||||
Verify state change requires 3 consecutive identical states before publishing.
|
||||
@ -212,6 +248,8 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
||||
return
|
||||
|
||||
if self.interpreter is None:
|
||||
# When interpreter is None, always save (score is 0.0, which is < 1.0)
|
||||
if self._should_save_image(camera, "unknown", 0.0):
|
||||
write_classification_attempt(
|
||||
self.train_dir,
|
||||
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
||||
@ -236,12 +274,15 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
||||
score = round(probs[best_id], 2)
|
||||
self.__update_metrics(datetime.datetime.now().timestamp() - now)
|
||||
|
||||
detected_state = self.labelmap[best_id]
|
||||
|
||||
if self._should_save_image(camera, detected_state, score):
|
||||
write_classification_attempt(
|
||||
self.train_dir,
|
||||
cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
|
||||
"none-none",
|
||||
now,
|
||||
self.labelmap[best_id],
|
||||
detected_state,
|
||||
score,
|
||||
)
|
||||
|
||||
@ -251,7 +292,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
|
||||
)
|
||||
return
|
||||
|
||||
detected_state = self.labelmap[best_id]
|
||||
verified_state = self.verify_state_change(camera, detected_state)
|
||||
|
||||
if verified_state is not None:
|
||||
|
||||
@ -190,7 +190,11 @@ class OnvifController:
|
||||
ptz: ONVIFService = await onvif.create_ptz_service()
|
||||
self.cams[camera_name]["ptz"] = ptz
|
||||
|
||||
try:
|
||||
imaging: ONVIFService = await onvif.create_imaging_service()
|
||||
except (Fault, ONVIFError, TransportError, Exception) as e:
|
||||
logger.debug(f"Imaging service not supported for {camera_name}: {e}")
|
||||
imaging = None
|
||||
self.cams[camera_name]["imaging"] = imaging
|
||||
try:
|
||||
video_sources = await media.GetVideoSources()
|
||||
@ -381,7 +385,10 @@ class OnvifController:
|
||||
f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
|
||||
)
|
||||
|
||||
if self.cams[camera_name]["video_source_token"] is not None:
|
||||
if (
|
||||
self.cams[camera_name]["video_source_token"] is not None
|
||||
and imaging is not None
|
||||
):
|
||||
try:
|
||||
imaging_capabilities = await imaging.GetImagingSettings(
|
||||
{"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
|
||||
@ -421,6 +428,7 @@ class OnvifController:
|
||||
if (
|
||||
"focus" in self.cams[camera_name]["features"]
|
||||
and self.cams[camera_name]["video_source_token"]
|
||||
and self.cams[camera_name]["imaging"] is not None
|
||||
):
|
||||
try:
|
||||
stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
|
||||
@ -648,6 +656,7 @@ class OnvifController:
|
||||
if (
|
||||
"focus" not in self.cams[camera_name]["features"]
|
||||
or not self.cams[camera_name]["video_source_token"]
|
||||
or self.cams[camera_name]["imaging"] is None
|
||||
):
|
||||
logger.error(f"{camera_name} does not support ONVIF continuous focus.")
|
||||
return
|
||||
|
||||
@ -37,7 +37,7 @@ import { useForm } from "react-hook-form";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { LuPlus, LuX } from "react-icons/lu";
|
||||
import { toast } from "sonner";
|
||||
import useSWR from "swr";
|
||||
import useSWR, { mutate } from "swr";
|
||||
import { z } from "zod";
|
||||
|
||||
type ClassificationModelEditDialogProps = {
|
||||
@ -240,24 +240,72 @@ export default function ClassificationModelEditDialog({
|
||||
position: "top-center",
|
||||
});
|
||||
} else {
|
||||
// State model - update classes
|
||||
// Note: For state models, updating classes requires renaming categories
|
||||
// which is handled through the dataset API, not the config API
|
||||
// We'll need to implement this by calling the rename endpoint for each class
|
||||
// For now, we just show a message that this requires retraining
|
||||
const stateData = data as StateFormData;
|
||||
const newClasses = stateData.classes.filter(
|
||||
(c) => c.trim().length > 0,
|
||||
);
|
||||
const oldClasses = dataset?.categories
|
||||
? Object.keys(dataset.categories).filter((key) => key !== "none")
|
||||
: [];
|
||||
|
||||
const renameMap = new Map<string, string>();
|
||||
const maxLength = Math.max(oldClasses.length, newClasses.length);
|
||||
|
||||
for (let i = 0; i < maxLength; i++) {
|
||||
const oldClass = oldClasses[i];
|
||||
const newClass = newClasses[i];
|
||||
|
||||
if (oldClass && newClass && oldClass !== newClass) {
|
||||
renameMap.set(oldClass, newClass);
|
||||
}
|
||||
}
|
||||
|
||||
const renamePromises = Array.from(renameMap.entries()).map(
|
||||
async ([oldName, newName]) => {
|
||||
try {
|
||||
await axios.put(
|
||||
`/classification/${model.name}/dataset/${oldName}/rename`,
|
||||
{
|
||||
new_category: newName,
|
||||
},
|
||||
);
|
||||
} catch (err) {
|
||||
const error = err as {
|
||||
response?: { data?: { message?: string; detail?: string } };
|
||||
};
|
||||
const errorMessage =
|
||||
error.response?.data?.message ||
|
||||
error.response?.data?.detail ||
|
||||
"Unknown error";
|
||||
throw new Error(
|
||||
`Failed to rename ${oldName} to ${newName}: ${errorMessage}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
if (renamePromises.length > 0) {
|
||||
await Promise.all(renamePromises);
|
||||
await mutate(`classification/${model.name}/dataset`);
|
||||
toast.success(t("toast.success.updatedModel"), {
|
||||
position: "top-center",
|
||||
});
|
||||
} else {
|
||||
toast.info(t("edit.stateClassesInfo"), {
|
||||
position: "top-center",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
onSuccess();
|
||||
onClose();
|
||||
} catch (err) {
|
||||
const error = err as {
|
||||
response?: { data?: { message?: string; detail?: string } };
|
||||
message?: string;
|
||||
};
|
||||
const errorMessage =
|
||||
error.message ||
|
||||
error.response?.data?.message ||
|
||||
error.response?.data?.detail ||
|
||||
"Unknown error";
|
||||
@ -268,7 +316,7 @@ export default function ClassificationModelEditDialog({
|
||||
setIsSaving(false);
|
||||
}
|
||||
},
|
||||
[isObjectModel, model, t, onSuccess, onClose],
|
||||
[isObjectModel, model, dataset, t, onSuccess, onClose],
|
||||
);
|
||||
|
||||
const handleCancel = useCallback(() => {
|
||||
|
||||
@ -478,8 +478,7 @@ export default function AuthenticationView({
|
||||
<TableCell className="text-right">
|
||||
<TooltipProvider>
|
||||
<div className="flex items-center justify-end gap-2">
|
||||
{user.username !== "admin" &&
|
||||
user.username !== "viewer" && (
|
||||
{user.username !== "admin" && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
|
||||
Loading…
Reference in New Issue
Block a user