Merge fb4fe8c430 into 1a75251ffb

2025-12-07 14:04:10 +03:00 · 2025-11-29 10:08:03 -05:00 · 2025-11-29 10:08:03 -05:00 · 481469bde2
commit 481469bde2
parent 1a75251ffb fb4fe8c430
5 changed files with 168 additions and 56 deletions
--- a/docs/docs/configuration/audio_detectors.md
+++ b/docs/docs/configuration/audio_detectors.md
@ -157,3 +157,19 @@ Only one `speech` event may be transcribed at a time. Frigate does not automatic
 :::
 Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient.
 #### FAQ
 1. Why doesn't Frigate automatically transcribe all `speech` events?
   Frigate does not implement a queue mechanism for speech transcription, and adding one is not trivial. A proper queue would need backpressure, prioritization, memory/disk buffering, retry logic, crash recovery, and safeguards to prevent unbounded growth when events outpace processing. That’s a significant amount of complexity for a feature that, in most real-world environments, would mostly just churn through low-value noise.
   Because transcription is **serialized (one event at a time)** and speech events can be generated far faster than they can be processed, an auto-transcribe toggle would very quickly create an ever-growing backlog and degrade core functionality. For the amount of engineering and risk involved, it adds **very little practical value** for the majority of deployments, which are often on low-powered, edge hardware.
   If you hear speech that’s actually important and worth saving/indexing for the future, **just press the transcribe button in Explore** on that specific `speech` event - that keeps things explicit, reliable, and under your control.
 2. Why don't you save live transcription text and use that for `speech` events?
   There’s no guarantee that a `speech` event is even created from the exact audio that went through the transcription model. Live transcription and `speech` event creation are **separate, asynchronous processes**. Even when both are correctly configured, trying to align the **precise start and end time of a speech event** with whatever audio the model happened to be processing at that moment is unreliable.
   Automatically persisting that data would often result in **misaligned, partial, or irrelevant transcripts**, while still incurring all of the CPU, storage, and privacy costs of transcription. That’s why Frigate treats transcription as an **explicit, user-initiated action** rather than an automatic side-effect of every `speech` event.
--- a/frigate/data_processing/real_time/custom_classification.py
+++ b/frigate/data_processing/real_time/custom_classification.py
@ -99,6 +99,42 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
        if self.inference_speed:
            self.inference_speed.update(duration)
    def _should_save_image(
        self, camera: str, detected_state: str, score: float = 1.0
    ) -> bool:
        """
        Determine if we should save the image for training.
        Save when:
        - State is changing or being verified (regardless of score)
        - Score is less than 100% (even if state matches, useful for training)
        Don't save when:
        - State is stable (matches current_state) AND score is 100%
        """
        if camera not in self.state_history:
            # First detection for this camera, save it
            return True
        verification = self.state_history[camera]
        current_state = verification.get("current_state")
        pending_state = verification.get("pending_state")
        # Save if there's a pending state change being verified
        if pending_state is not None:
            return True
        # Save if the detected state differs from the current verified state
        # (state is changing)
        if current_state is not None and detected_state != current_state:
            return True
        # If score is less than 100%, save even if state matches
        # (useful for training to improve confidence)
        if score < 1.0:
            return True
        # Don't save if state is stable (detected_state == current_state) AND score is 100%
        return False
    def verify_state_change(self, camera: str, detected_state: str) -> str | None:
        """
        Verify state change requires 3 consecutive identical states before publishing.
@ -212,14 +248,16 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
                return
        if self.interpreter is None:
-            write_classification_attempt(
+            # When interpreter is None, always save (score is 0.0, which is < 1.0)
-                self.train_dir,
+            if self._should_save_image(camera, "unknown", 0.0):
-                cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
+                write_classification_attempt(
-                "none-none",
+                    self.train_dir,
-                now,
+                    cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
-                "unknown",
+                    "none-none",
-                0.0,
+                    now,
-            )
+                    "unknown",
                    0.0,
                )
            return
        input = np.expand_dims(resized_frame, axis=0)
@ -236,14 +274,17 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
        score = round(probs[best_id], 2)
        self.__update_metrics(datetime.datetime.now().timestamp() - now)
-        write_classification_attempt(
+        detected_state = self.labelmap[best_id]
-            self.train_dir,
+
-            cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
+        if self._should_save_image(camera, detected_state, score):
-            "none-none",
+            write_classification_attempt(
-            now,
+                self.train_dir,
-            self.labelmap[best_id],
+                cv2.cvtColor(frame, cv2.COLOR_RGB2BGR),
-            score,
+                "none-none",
-        )
+                now,
                detected_state,
                score,
            )
        if score < self.model_config.threshold:
            logger.debug(
@ -251,7 +292,6 @@ class CustomStateClassificationProcessor(RealTimeProcessorApi):
            )
            return
        detected_state = self.labelmap[best_id]
        verified_state = self.verify_state_change(camera, detected_state)
        if verified_state is not None:
--- a/frigate/ptz/onvif.py
+++ b/frigate/ptz/onvif.py
@ -190,7 +190,11 @@ class OnvifController:
        ptz: ONVIFService = await onvif.create_ptz_service()
        self.cams[camera_name]["ptz"] = ptz
-        imaging: ONVIFService = await onvif.create_imaging_service()
+        try:
            imaging: ONVIFService = await onvif.create_imaging_service()
        except (Fault, ONVIFError, TransportError, Exception) as e:
            logger.debug(f"Imaging service not supported for {camera_name}: {e}")
            imaging = None
        self.cams[camera_name]["imaging"] = imaging
        try:
            video_sources = await media.GetVideoSources()
@ -381,7 +385,10 @@ class OnvifController:
                            f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}"
                        )
-        if self.cams[camera_name]["video_source_token"] is not None:
+        if (
            self.cams[camera_name]["video_source_token"] is not None
            and imaging is not None
        ):
            try:
                imaging_capabilities = await imaging.GetImagingSettings(
                    {"VideoSourceToken": self.cams[camera_name]["video_source_token"]}
@ -421,6 +428,7 @@ class OnvifController:
        if (
            "focus" in self.cams[camera_name]["features"]
            and self.cams[camera_name]["video_source_token"]
            and self.cams[camera_name]["imaging"] is not None
        ):
            try:
                stop_request = self.cams[camera_name]["imaging"].create_type("Stop")
@ -648,6 +656,7 @@ class OnvifController:
        if (
            "focus" not in self.cams[camera_name]["features"]
            or not self.cams[camera_name]["video_source_token"]
            or self.cams[camera_name]["imaging"] is None
        ):
            logger.error(f"{camera_name} does not support ONVIF continuous focus.")
            return
--- a/web/src/components/classification/ClassificationModelEditDialog.tsx
+++ b/web/src/components/classification/ClassificationModelEditDialog.tsx
@ -37,7 +37,7 @@ import { useForm } from "react-hook-form";
 import { useTranslation } from "react-i18next";
 import { LuPlus, LuX } from "react-icons/lu";
 import { toast } from "sonner";
-import useSWR from "swr";
+import useSWR, { mutate } from "swr";
 import { z } from "zod";
 type ClassificationModelEditDialogProps = {
@ -240,15 +240,61 @@ export default function ClassificationModelEditDialog({
            position: "top-center",
          });
        } else {
-          // State model - update classes
+          const stateData = data as StateFormData;
-          // Note: For state models, updating classes requires renaming categories
+          const newClasses = stateData.classes.filter(
-          // which is handled through the dataset API, not the config API
+            (c) => c.trim().length > 0,
-          // We'll need to implement this by calling the rename endpoint for each class
+          );
-          // For now, we just show a message that this requires retraining
+          const oldClasses = dataset?.categories
            ? Object.keys(dataset.categories).filter((key) => key !== "none")
            : [];
-          toast.info(t("edit.stateClassesInfo"), {
+          const renameMap = new Map<string, string>();
-            position: "top-center",
+          const maxLength = Math.max(oldClasses.length, newClasses.length);
-          });
+
          for (let i = 0; i < maxLength; i++) {
            const oldClass = oldClasses[i];
            const newClass = newClasses[i];
            if (oldClass && newClass && oldClass !== newClass) {
              renameMap.set(oldClass, newClass);
            }
          }
          const renamePromises = Array.from(renameMap.entries()).map(
            async ([oldName, newName]) => {
              try {
                await axios.put(
                  `/classification/${model.name}/dataset/${oldName}/rename`,
                  {
                    new_category: newName,
                  },
                );
              } catch (err) {
                const error = err as {
                  response?: { data?: { message?: string; detail?: string } };
                };
                const errorMessage =
                  error.response?.data?.message ||
                  error.response?.data?.detail ||
                  "Unknown error";
                throw new Error(
                  `Failed to rename ${oldName} to ${newName}: ${errorMessage}`,
                );
              }
            },
          );
          if (renamePromises.length > 0) {
            await Promise.all(renamePromises);
            await mutate(`classification/${model.name}/dataset`);
            toast.success(t("toast.success.updatedModel"), {
              position: "top-center",
            });
          } else {
            toast.info(t("edit.stateClassesInfo"), {
              position: "top-center",
            });
          }
        }
        onSuccess();
@ -256,8 +302,10 @@ export default function ClassificationModelEditDialog({
      } catch (err) {
        const error = err as {
          response?: { data?: { message?: string; detail?: string } };
          message?: string;
        };
        const errorMessage =
          error.message ||
          error.response?.data?.message ||
          error.response?.data?.detail ||
          "Unknown error";
@ -268,7 +316,7 @@ export default function ClassificationModelEditDialog({
        setIsSaving(false);
      }
    },
-    [isObjectModel, model, t, onSuccess, onClose],
+    [isObjectModel, model, dataset, t, onSuccess, onClose],
  );
  const handleCancel = useCallback(() => {
--- a/web/src/views/settings/AuthenticationView.tsx
+++ b/web/src/views/settings/AuthenticationView.tsx
@ -478,33 +478,32 @@ export default function AuthenticationView({
                      <TableCell className="text-right">
                        <TooltipProvider>
                          <div className="flex items-center justify-end gap-2">
-                            {user.username !== "admin" &&
+                            {user.username !== "admin" && (
-                              user.username !== "viewer" && (
+                              <Tooltip>
-                                <Tooltip>
+                                <TooltipTrigger asChild>
-                                  <TooltipTrigger asChild>
+                                  <Button
-                                    <Button
+                                    size="sm"
-                                      size="sm"
+                                    variant="outline"
-                                      variant="outline"
+                                    className="h-8 px-2"
-                                      className="h-8 px-2"
+                                    onClick={() => {
-                                      onClick={() => {
+                                      setSelectedUser(user.username);
-                                        setSelectedUser(user.username);
+                                      setSelectedUserRole(
-                                        setSelectedUserRole(
+                                        user.role || "viewer",
-                                          user.role || "viewer",
+                                      );
-                                        );
+                                      setShowRoleChange(true);
-                                        setShowRoleChange(true);
+                                    }}
-                                      }}
+                                  >
-                                    >
+                                    <LuUserCog className="size-3.5" />
-                                      <LuUserCog className="size-3.5" />
+                                    <span className="ml-1.5 hidden sm:inline-block">
-                                      <span className="ml-1.5 hidden sm:inline-block">
+                                      {t("role.title", { ns: "common" })}
-                                        {t("role.title", { ns: "common" })}
+                                    </span>
-                                      </span>
+                                  </Button>
-                                    </Button>
+                                </TooltipTrigger>
-                                  </TooltipTrigger>
+                                <TooltipContent>
-                                  <TooltipContent>
+                                  <p>{t("users.table.changeRole")}</p>
-                                    <p>{t("users.table.changeRole")}</p>
+                                </TooltipContent>
-                                  </TooltipContent>
+                              </Tooltip>
-                                </Tooltip>
+                            )}
                              )}
                            <Tooltip>
                              <TooltipTrigger asChild>