diff --git a/docs/docs/configuration/audio_detectors.md b/docs/docs/configuration/audio_detectors.md index 245ce703c..80b0727a5 100644 --- a/docs/docs/configuration/audio_detectors.md +++ b/docs/docs/configuration/audio_detectors.md @@ -157,3 +157,19 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic ::: Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient. + +#### FAQ + +1. Why doesn't Frigate automatically transcribe all `speech` events? + + Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. That’s a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise. + + Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware. + + If you hear speech that’s actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control. + +2. Why don't you save live transcription text and use that for `speech` events? + + There’s no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable. + + Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. That’s why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event. diff --git a/frigate/data_processing/real_time/custom_classification.py b/frigate/data_processing/real_time/custom_classification.py index cae3087ff..179d2f43f 100644 --- a/frigate/data_processing/real_time/custom_classification.py +++ b/frigate/data_processing/real_time/custom_classification.py @@ -99,6 +99,42 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): if self.inference_speed: self.inference_speed.update(duration) + def _should_save_image( + self, camera: str, detected_state: str, score: float = 1.0 + ) -> bool: + """ + Determine if we should save the image for training. + Save when: + - State is changing or being verified (regardless of score) + - Score is less than 100% (even if state matches, useful for training) + Don't save when: + - State is stable (matches current_state) AND score is 100% + """ + if camera not in self.state_history: + # First detection for this camera, save it + return True + + verification = self.state_history[camera] + current_state = verification.get("current_state") + pending_state = verification.get("pending_state") + + # Save if there's a pending state change being verified + if pending_state is not None: + return True + + # Save if the detected state differs from the current verified state + # (state is changing) + if current_state is not None and detected_state != current_state: + return True + + # If score is less than 100%, save even if state matches + # (useful for training to improve confidence) + if score < 1.0: + return True + + # Don't save if state is stable (detected_state == current_state) AND score is 100% + return False + def verify_state_change(self, camera: str, detected_state: str) -> str | None: """ Verify state change requires 3 consecutive identical states before publishing. @@ -212,14 +248,16 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): return if self.interpreter is None: - write_classification_attempt( - self.train_dir, - cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), - "none-none", - now, - "unknown", - 0.0, - ) + # When interpreter is None, always save (score is 0.0, which is < 1.0) + if self._should_save_image(camera, "unknown", 0.0): + write_classification_attempt( + self.train_dir, + cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), + "none-none", + now, + "unknown", + 0.0, + ) return input = np.expand_dims(resized_frame, axis=0) @@ -236,14 +274,17 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): score = round(probs[best_id], 2) self.__update_metrics(datetime.datetime.now().timestamp() - now) - write_classification_attempt( - self.train_dir, - cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), - "none-none", - now, - self.labelmap[best_id], - score, - ) + detected_state = self.labelmap[best_id] + + if self._should_save_image(camera, detected_state, score): + write_classification_attempt( + self.train_dir, + cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), + "none-none", + now, + detected_state, + score, + ) if score < self.model_config.threshold: logger.debug( @@ -251,7 +292,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi): ) return - detected_state = self.labelmap[best_id] verified_state = self.verify_state_change(camera, detected_state) if verified_state is not None: diff --git a/frigate/ptz/onvif.py b/frigate/ptz/onvif.py index 13faffc97..e7539b1d6 100644 --- a/frigate/ptz/onvif.py +++ b/frigate/ptz/onvif.py @@ -190,7 +190,11 @@ class OnvifController: ptz: ONVIFService = await onvif.create_ptz_service() self.cams[camera_name]["ptz"] = ptz - imaging: ONVIFService = await onvif.create_imaging_service() + try: + imaging: ONVIFService = await onvif.create_imaging_service() + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.debug(f"Imaging service not supported for {camera_name}: {e}") + imaging = None self.cams[camera_name]["imaging"] = imaging try: video_sources = await media.GetVideoSources() @@ -381,7 +385,10 @@ class OnvifController: f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}" ) - if self.cams[camera_name]["video_source_token"] is not None: + if ( + self.cams[camera_name]["video_source_token"] is not None + and imaging is not None + ): try: imaging_capabilities = await imaging.GetImagingSettings( {"VideoSourceToken": self.cams[camera_name]["video_source_token"]} @@ -421,6 +428,7 @@ class OnvifController: if ( "focus" in self.cams[camera_name]["features"] and self.cams[camera_name]["video_source_token"] + and self.cams[camera_name]["imaging"] is not None ): try: stop_request = self.cams[camera_name]["imaging"].create_type("Stop") @@ -648,6 +656,7 @@ class OnvifController: if ( "focus" not in self.cams[camera_name]["features"] or not self.cams[camera_name]["video_source_token"] + or self.cams[camera_name]["imaging"] is None ): logger.error(f"{camera_name} does not support ONVIF continuous focus.") return diff --git a/web/src/components/classification/ClassificationModelEditDialog.tsx b/web/src/components/classification/ClassificationModelEditDialog.tsx index c47765d76..a3ff2df8a 100644 --- a/web/src/components/classification/ClassificationModelEditDialog.tsx +++ b/web/src/components/classification/ClassificationModelEditDialog.tsx @@ -37,7 +37,7 @@ import { useForm } from "react-hook-form"; import { useTranslation } from "react-i18next"; import { LuPlus, LuX } from "react-icons/lu"; import { toast } from "sonner"; -import useSWR from "swr"; +import useSWR, { mutate } from "swr"; import { z } from "zod"; type ClassificationModelEditDialogProps = { @@ -240,15 +240,61 @@ export default function ClassificationModelEditDialog({ position: "top-center", }); } else { - // State model - update classes - // Note: For state models, updating classes requires renaming categories - // which is handled through the dataset API, not the config API - // We'll need to implement this by calling the rename endpoint for each class - // For now, we just show a message that this requires retraining + const stateData = data as StateFormData; + const newClasses = stateData.classes.filter( + (c) => c.trim().length > 0, + ); + const oldClasses = dataset?.categories + ? Object.keys(dataset.categories).filter((key) => key !== "none") + : []; - toast.info(t("edit.stateClassesInfo"), { - position: "top-center", - }); + const renameMap = new Map(); + const maxLength = Math.max(oldClasses.length, newClasses.length); + + for (let i = 0; i < maxLength; i++) { + const oldClass = oldClasses[i]; + const newClass = newClasses[i]; + + if (oldClass && newClass && oldClass !== newClass) { + renameMap.set(oldClass, newClass); + } + } + + const renamePromises = Array.from(renameMap.entries()).map( + async ([oldName, newName]) => { + try { + await axios.put( + `/classification/${model.name}/dataset/${oldName}/rename`, + { + new_category: newName, + }, + ); + } catch (err) { + const error = err as { + response?: { data?: { message?: string; detail?: string } }; + }; + const errorMessage = + error.response?.data?.message || + error.response?.data?.detail || + "Unknown error"; + throw new Error( + `Failed to rename ${oldName} to ${newName}: ${errorMessage}`, + ); + } + }, + ); + + if (renamePromises.length > 0) { + await Promise.all(renamePromises); + await mutate(`classification/${model.name}/dataset`); + toast.success(t("toast.success.updatedModel"), { + position: "top-center", + }); + } else { + toast.info(t("edit.stateClassesInfo"), { + position: "top-center", + }); + } } onSuccess(); @@ -256,8 +302,10 @@ export default function ClassificationModelEditDialog({ } catch (err) { const error = err as { response?: { data?: { message?: string; detail?: string } }; + message?: string; }; const errorMessage = + error.message || error.response?.data?.message || error.response?.data?.detail || "Unknown error"; @@ -268,7 +316,7 @@ export default function ClassificationModelEditDialog({ setIsSaving(false); } }, - [isObjectModel, model, t, onSuccess, onClose], + [isObjectModel, model, dataset, t, onSuccess, onClose], ); const handleCancel = useCallback(() => { diff --git a/web/src/views/settings/AuthenticationView.tsx b/web/src/views/settings/AuthenticationView.tsx index 19f157b46..124348813 100644 --- a/web/src/views/settings/AuthenticationView.tsx +++ b/web/src/views/settings/AuthenticationView.tsx @@ -478,33 +478,32 @@ export default function AuthenticationView({
- {user.username !== "admin" && - user.username !== "viewer" && ( - - - - - -

{t("users.table.changeRole")}

-
-
- )} + {user.username !== "admin" && ( + + + + + +

{t("users.table.changeRole")}

+
+
+ )}