Merge fb4fe8c430 into 1a75251ffb

2025-12-06 21:44:13 +03:00 · 2025-11-29 10:08:03 -05:00 · 2025-11-29 10:08:03 -05:00 · 481469bde2
commit 481469bde2
parent 1a75251ffb fb4fe8c430
5 changed files with 168 additions and 56 deletions
--- a/docs/docs/configuration/audio_detectors.md
+++ b/docs/docs/configuration/audio_detectors.md
@ -157,3 +157,19 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic
 :::

 Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
+
+#### FAQ
+
+1. Why doesn't Frigate automatically transcribe all `speech` events?
+
+   Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. That’s a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise.
+
+   Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware.
+
+   If you hear speech that’s actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control.
+
+2. Why don't you save live transcription text and use that for `speech` events?
+
+   There’s no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable.
+
+   Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. That’s why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event.
--- a/frigate/data_processing/real_time/custom_classification.py
+++ b/frigate/data_processing/real_time/custom_classification.py
@ -99,6 +99,42 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
        if self.inference_speed:
            self.inference_speed.update(duration)

+    def _should_save_image(
+        self, camera: str, detected_state: str, score: float = 1.0
+    ) -> bool:
+        """
+        Determine if we should save the image for training.
+        Save when:
+        - State is changing or being verified (regardless of score)
+        - Score is less than 100% (even if state matches, useful for training)
+        Don't save when:
+        - State is stable (matches current_state) AND score is 100%
+        """
+        if camera not in self.state_history:
+            # First detection for this camera, save it
+            return True
+
+        verification = self.state_history[camera]
+        current_state = verification.get("current_state")
+        pending_state = verification.get("pending_state")
+
+        # Save if there's a pending state change being verified
+        if pending_state is not None:
+            return True
+
+        # Save if the detected state differs from the current verified state
+        # (state is changing)
+        if current_state is not None and detected_state != current_state:
+            return True
+
+        # If score is less than 100%, save even if state matches
+        # (useful for training to improve confidence)
+        if score < 1.0:
+            return True
+
+        # Don't save if state is stable (detected_state == current_state) AND score is 100%
+        return False
+
    def verify_state_change(self, camera: str, detected_state: str) -> str | None:
        """
        Verify state change requires 3 consecutive identical states before publishing.
@ -212,6 +248,8 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
                return

        if self.interpreter is None:
+            # When interpreter is None, always save (score is 0.0, which is < 1.0)
+            if self._should_save_image(camera, "unknown", 0.0):
                write_classification_attempt(
                    self.train_dir,
                    cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
@ -236,12 +274,15 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
        score = round(probs[best_id], 2)
        self.__update_metrics(datetime.datetime.now().timestamp() - now)

+        detected_state = self.labelmap[best_id]
+
+        if self._should_save_image(camera, detected_state, score):
            write_classification_attempt(
                self.train_dir,
                cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
                "none-none",
                now,
-            self.labelmap[best_id],
+                detected_state,
                score,
            )

@ -251,7 +292,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
            )
            return

-        detected_state = self.labelmap[best_id]
        verified_state = self.verify_state_change(camera, detected_state)

        if verified_state is not None:
--- a/frigate/ptz/onvif.py
+++ b/frigate/ptz/onvif.py
@ -190,7 +190,11 @@ class OnvifController:
        ptz: ONVIFService = await onvif.create_ptz_service()
        self.cams[camera_name]["ptz"] = ptz

+        try:
            imaging: ONVIFService = await onvif.create_imaging_service()
+        except (Fault, ONVIFError, TransportError, Exception) as e:
+            logger.debug(f"Imaging service not supported for {camera_name}: {e}")
+            imaging = None
        self.cams[camera_name]["imaging"] = imaging
        try:
            video_sources = await media.GetVideoSources()
@ -381,7 +385,10 @@ class OnvifController:
                            f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
                        )

-        if self.cams[camera_name]["video_source_token"] is not None:
+        if (
+            self.cams[camera_name]["video_source_token"] is not None
+            and imaging is not None
+        ):
            try:
                imaging_capabilities = await imaging.GetImagingSettings(
                    {"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
@ -421,6 +428,7 @@ class OnvifController:
        if (
            "focus" in self.cams[camera_name]["features"]
            and self.cams[camera_name]["video_source_token"]
+            and self.cams[camera_name]["imaging"] is not None
        ):
            try:
                stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
@ -648,6 +656,7 @@ class OnvifController:
        if (
            "focus" not in self.cams[camera_name]["features"]
            or not self.cams[camera_name]["video_source_token"]
+            or self.cams[camera_name]["imaging"] is None
        ):
            logger.error(f"{camera_name} does not support ONVIF continuous focus.")
            return
--- a/web/src/components/classification/ClassificationModelEditDialog.tsx
+++ b/web/src/components/classification/ClassificationModelEditDialog.tsx
@ -37,7 +37,7 @@ import { useForm } from "react-hook-form";
 import { useTranslation } from "react-i18next";
 import { LuPlus, LuX } from "react-icons/lu";
 import { toast } from "sonner";
-import useSWR from "swr";
+import useSWR, { mutate } from "swr";
 import { z } from "zod";

 type ClassificationModelEditDialogProps = {
@ -240,24 +240,72 @@ export default function ClassificationModelEditDialog({
            position: "top-center",
          });
        } else {
-          // State model - update classes
-          // Note: For state models, updating classes requires renaming categories
-          // which is handled through the dataset API, not the config API
-          // We'll need to implement this by calling the rename endpoint for each class
-          // For now, we just show a message that this requires retraining
+          const stateData = data as StateFormData;
+          const newClasses = stateData.classes.filter(
+            (c) => c.trim().length > 0,
+          );
+          const oldClasses = dataset?.categories
+            ? Object.keys(dataset.categories).filter((key) => key !== "none")
+            : [];

+          const renameMap = new Map<string, string>();
+          const maxLength = Math.max(oldClasses.length, newClasses.length);
+
+          for (let i = 0; i < maxLength; i++) {
+            const oldClass = oldClasses[i];
+            const newClass = newClasses[i];
+
+            if (oldClass && newClass && oldClass !== newClass) {
+              renameMap.set(oldClass, newClass);
+            }
+          }
+
+          const renamePromises = Array.from(renameMap.entries()).map(
+            async ([oldName, newName]) => {
+              try {
+                await axios.put(
+                  `/classification/${model.name}/dataset/${oldName}/rename`,
+                  {
+                    new_category: newName,
+                  },
+                );
+              } catch (err) {
+                const error = err as {
+                  response?: { data?: { message?: string; detail?: string } };
+                };
+                const errorMessage =
+                  error.response?.data?.message ||
+                  error.response?.data?.detail ||
+                  "Unknown error";
+                throw new Error(
+                  `Failed to rename ${oldName} to ${newName}: ${errorMessage}`,
+                );
+              }
+            },
+          );
+
+          if (renamePromises.length > 0) {
+            await Promise.all(renamePromises);
+            await mutate(`classification/${model.name}/dataset`);
+            toast.success(t("toast.success.updatedModel"), {
+              position: "top-center",
+            });
+          } else {
            toast.info(t("edit.stateClassesInfo"), {
              position: "top-center",
            });
          }
+        }

        onSuccess();
        onClose();
      } catch (err) {
        const error = err as {
          response?: { data?: { message?: string; detail?: string } };
+          message?: string;
        };
        const errorMessage =
+          error.message ||
          error.response?.data?.message ||
          error.response?.data?.detail ||
          "Unknown error";
@ -268,7 +316,7 @@ export default function ClassificationModelEditDialog({
        setIsSaving(false);
      }
    },
-    [isObjectModel, model, t, onSuccess, onClose],
+    [isObjectModel, model, dataset, t, onSuccess, onClose],
  );

  const handleCancel = useCallback(() => {
--- a/web/src/views/settings/AuthenticationView.tsx
+++ b/web/src/views/settings/AuthenticationView.tsx
@ -478,8 +478,7 @@ export default function AuthenticationView({
                      <TableCell className="text-right">
                        <TooltipProvider>
                          <div className="flex items-center justify-end gap-2">
-                            {user.username !== "admin" &&
-                              user.username !== "viewer" && (
+                            {user.username !== "admin" && (
                              <Tooltip>
                                <TooltipTrigger asChild>
                                  <Button