From c5e08c7bcf8e739c7e8c04b01cf2c435e8e8d5b2 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sun, 23 Nov 2025 12:22:27 -0600 Subject: [PATCH 1/5] fix wording in reference config --- docs/docs/configuration/reference.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 907bda21e..10a480393 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -700,9 +700,9 @@ genai: # Optional: Configuration for audio transcription # NOTE: only the enabled option can be overridden at the camera level audio_transcription: - # Optional: Enable license plate recognition (default: shown below) + # Optional: Enable audio transcription (default: shown below) enabled: False - # Optional: The device to run the models on (default: shown below) + # Optional: The device to run the models on. (default: shown below) device: CPU # Optional: Set the model size used for transcription. (default: shown below) model_size: small From 9242997079ff9c4fd8e2482b0877312d5a7846f3 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sun, 23 Nov 2025 12:25:00 -0600 Subject: [PATCH 2/5] spacing tweaks --- web/src/views/settings/AuthenticationView.tsx | 2 +- web/src/views/settings/CameraManagementView.tsx | 2 +- web/src/views/settings/CameraReviewSettingsView.tsx | 2 +- web/src/views/settings/EnrichmentsSettingsView.tsx | 2 +- web/src/views/settings/FrigatePlusSettingsView.tsx | 2 +- web/src/views/settings/MasksAndZonesView.tsx | 2 +- web/src/views/settings/MotionTunerView.tsx | 2 +- web/src/views/settings/NotificationsSettingsView.tsx | 4 ++-- web/src/views/settings/ObjectSettingsView.tsx | 6 +++--- web/src/views/settings/TriggerView.tsx | 2 +- web/src/views/settings/UiSettingsView.tsx | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/web/src/views/settings/AuthenticationView.tsx b/web/src/views/settings/AuthenticationView.tsx index 5c11d8914..19f157b46 100644 --- a/web/src/views/settings/AuthenticationView.tsx +++ b/web/src/views/settings/AuthenticationView.tsx @@ -784,7 +784,7 @@ export default function AuthenticationView({ return (
-
+
{section === "users" && UsersSection} {section === "roles" && RolesSection} {!section && ( diff --git a/web/src/views/settings/CameraManagementView.tsx b/web/src/views/settings/CameraManagementView.tsx index 1a626fa02..8f1b5eae5 100644 --- a/web/src/views/settings/CameraManagementView.tsx +++ b/web/src/views/settings/CameraManagementView.tsx @@ -65,7 +65,7 @@ export default function CameraManagementView({ closeButton />
-
+
{viewMode === "settings" ? ( <> diff --git a/web/src/views/settings/CameraReviewSettingsView.tsx b/web/src/views/settings/CameraReviewSettingsView.tsx index 47ea5c22a..7a7b92e4e 100644 --- a/web/src/views/settings/CameraReviewSettingsView.tsx +++ b/web/src/views/settings/CameraReviewSettingsView.tsx @@ -298,7 +298,7 @@ export default function CameraReviewSettingsView({ <>
-
+
{t("cameraReview.title")} diff --git a/web/src/views/settings/EnrichmentsSettingsView.tsx b/web/src/views/settings/EnrichmentsSettingsView.tsx index e3b0626b9..6aba50dd3 100644 --- a/web/src/views/settings/EnrichmentsSettingsView.tsx +++ b/web/src/views/settings/EnrichmentsSettingsView.tsx @@ -244,7 +244,7 @@ export default function EnrichmentsSettingsView({ return (
-
+
{t("enrichments.title")} diff --git a/web/src/views/settings/FrigatePlusSettingsView.tsx b/web/src/views/settings/FrigatePlusSettingsView.tsx index 52af94354..80d98b197 100644 --- a/web/src/views/settings/FrigatePlusSettingsView.tsx +++ b/web/src/views/settings/FrigatePlusSettingsView.tsx @@ -211,7 +211,7 @@ export default function FrigatePlusSettingsView({ <>
-
+
{t("frigatePlus.title")} diff --git a/web/src/views/settings/MasksAndZonesView.tsx b/web/src/views/settings/MasksAndZonesView.tsx index 27c542e87..efeaa9be0 100644 --- a/web/src/views/settings/MasksAndZonesView.tsx +++ b/web/src/views/settings/MasksAndZonesView.tsx @@ -434,7 +434,7 @@ export default function MasksAndZonesView({ {cameraConfig && editingPolygons && (
-
+
{editPane == "zone" && ( -
+
{t("motionDetectionTuner.title")} diff --git a/web/src/views/settings/NotificationsSettingsView.tsx b/web/src/views/settings/NotificationsSettingsView.tsx index 6280ca6a8..77da16386 100644 --- a/web/src/views/settings/NotificationsSettingsView.tsx +++ b/web/src/views/settings/NotificationsSettingsView.tsx @@ -331,7 +331,7 @@ export default function NotificationView({ if (!("Notification" in window) || !window.isSecureContext) { return ( -
+
@@ -385,7 +385,7 @@ export default function NotificationView({ <>
-
+
-
+
{t("debug.title")} @@ -434,7 +434,7 @@ function ObjectList({ cameraConfig, objects }: ObjectListProps) { {t("debug.objectShapeFilterDrawing.area")}

{obj.area ? ( - <> +
px: {obj.area.toString()}
@@ -448,7 +448,7 @@ function ObjectList({ cameraConfig, objects }: ObjectListProps) { .toFixed(4) .toString()}
- +
) : ( "-" )} diff --git a/web/src/views/settings/TriggerView.tsx b/web/src/views/settings/TriggerView.tsx index 0b004fd82..a0e19f5b2 100644 --- a/web/src/views/settings/TriggerView.tsx +++ b/web/src/views/settings/TriggerView.tsx @@ -440,7 +440,7 @@ export default function TriggerView({ return (
-
+
{!isSemanticSearchEnabled ? (
diff --git a/web/src/views/settings/UiSettingsView.tsx b/web/src/views/settings/UiSettingsView.tsx index 8ec484aa3..34df0ddc8 100644 --- a/web/src/views/settings/UiSettingsView.tsx +++ b/web/src/views/settings/UiSettingsView.tsx @@ -108,7 +108,7 @@ export default function UiSettingsView() { <>
-
+
{t("general.title")} From bf4f63e50e71c0f7403e01f12ddbda140c02a0d4 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sun, 23 Nov 2025 12:25:23 -0600 Subject: [PATCH 3/5] make live view settings drawer scrollable --- web/src/views/live/LiveCameraView.tsx | 608 +++++++++++++------------- 1 file changed, 311 insertions(+), 297 deletions(-) diff --git a/web/src/views/live/LiveCameraView.tsx b/web/src/views/live/LiveCameraView.tsx index 65257326f..ada72bee3 100644 --- a/web/src/views/live/LiveCameraView.tsx +++ b/web/src/views/live/LiveCameraView.tsx @@ -1376,329 +1376,343 @@ function FrigateCameraFeatures({ title={t("cameraSettings.title", { camera })} /> - -
- {isAdmin && ( - <> - - sendEnabled(enabledState == "ON" ? "OFF" : "ON") - } - /> - - sendDetect(detectState == "ON" ? "OFF" : "ON") - } - /> - {recordingEnabled && ( + +
+ <> + {isAdmin && ( + <> - sendRecord(recordState == "ON" ? "OFF" : "ON") + sendEnabled(enabledState == "ON" ? "OFF" : "ON") } /> - )} - - sendSnapshot(snapshotState == "ON" ? "OFF" : "ON") - } - /> - {audioDetectEnabled && ( - sendAudio(audioState == "ON" ? "OFF" : "ON") + sendDetect(detectState == "ON" ? "OFF" : "ON") } /> - )} - {audioDetectEnabled && transcriptionEnabled && ( + {recordingEnabled && ( + + sendRecord(recordState == "ON" ? "OFF" : "ON") + } + /> + )} - sendTranscription(transcriptionState == "ON" ? "OFF" : "ON") + sendSnapshot(snapshotState == "ON" ? "OFF" : "ON") } /> - )} - {autotrackingEnabled && ( - - sendAutotracking(autotrackingState == "ON" ? "OFF" : "ON") - } - /> - )} - - )} -
+ {audioDetectEnabled && ( + + sendAudio(audioState == "ON" ? "OFF" : "ON") + } + /> + )} + {audioDetectEnabled && transcriptionEnabled && ( + + sendTranscription( + transcriptionState == "ON" ? "OFF" : "ON", + ) + } + /> + )} + {autotrackingEnabled && ( + + sendAutotracking(autotrackingState == "ON" ? "OFF" : "ON") + } + /> + )} + + )} -
- {!isRestreamed && ( -
- -
- -
- {t("streaming.restreaming.disabled", { - ns: "components/dialog", - })} -
- - -
- - - {t("button.info", { ns: "common" })} - -
-
- - {t("streaming.restreaming.desc.title", { - ns: "components/dialog", - })} -
- - {t("readTheDocumentation", { ns: "common" })} - - -
-
-
-
-
- )} - {isRestreamed && Object.values(camera.live.streams).length > 0 && ( -
-
{t("stream.title")}
- - - {debug && ( -
- <> - -
{t("stream.debug.picker")}
- -
- )} - - {preferredLiveMode != "jsmpeg" && !debug && isRestreamed && ( -
- {supportsAudioOutput ? ( - <> - -
{t("stream.audio.available")}
- - ) : ( - <> - -
{t("stream.audio.unavailable")}
- - -
- - - {t("button.info", { ns: "common" })} - -
-
- - {t("stream.audio.tips.title")} -
- - {t("readTheDocumentation", { ns: "common" })} - - -
-
-
- - )} -
- )} - {preferredLiveMode != "jsmpeg" && - !debug && - isRestreamed && - supportsAudioOutput && ( +
+ {!isRestreamed && ( +
+
- {supports2WayTalk ? ( - <> - -
{t("stream.twoWayTalk.available")}
- - ) : ( - <> - -
{t("stream.twoWayTalk.unavailable")}
- - -
- - - {t("button.info", { ns: "common" })} - -
-
- - {t("stream.twoWayTalk.tips")} -
- +
+ {t("streaming.restreaming.disabled", { + ns: "components/dialog", + })} +
+ + +
+ + + {t("button.info", { ns: "common" })} + +
+
+ + {t("streaming.restreaming.desc.title", { + ns: "components/dialog", + })} +
+ + {t("readTheDocumentation", { ns: "common" })} + + +
+
+
+
+
+ )} + {isRestreamed && + Object.values(camera.live.streams).length > 0 && ( +
+
{t("stream.title")}
+ + + {debug && ( +
+ <> + +
{t("stream.debug.picker")}
+ +
+ )} + + {preferredLiveMode != "jsmpeg" && + !debug && + isRestreamed && ( +
+ {supportsAudioOutput ? ( + <> + +
{t("stream.audio.available")}
+ + ) : ( + <> + +
{t("stream.audio.unavailable")}
+ + +
+ + + {t("button.info", { ns: "common" })} + +
+
+ + {t("stream.audio.tips.title")} +
+ + {t("readTheDocumentation", { + ns: "common", + })} + + +
+
+
+ + )} +
+ )} + {preferredLiveMode != "jsmpeg" && + !debug && + isRestreamed && + supportsAudioOutput && ( +
+ {supports2WayTalk ? ( + <> + +
{t("stream.twoWayTalk.available")}
+ + ) : ( + <> + +
{t("stream.twoWayTalk.unavailable")}
+ + +
+ + + {t("button.info", { ns: "common" })} + +
+
+ + {t("stream.twoWayTalk.tips")} +
+ + {t("readTheDocumentation", { + ns: "common", + })} + + +
+
+
+ + )} +
+ )} + {preferredLiveMode == "jsmpeg" && isRestreamed && ( +
+
+ +

+ {t("stream.lowBandwidth.tips")} +

+
+ +
)}
)} - {preferredLiveMode == "jsmpeg" && isRestreamed && ( -
-
- -

{t("stream.lowBandwidth.tips")}

-
+
+
+ {t("manualRecording.title")} +
+
+
+

+ {t("manualRecording.tips")} +

+
+ {isRestreamed && ( + <> +
+ { + setPlayInBackground(checked); + }} + disabled={debug} + /> +

+ {t("manualRecording.playInBackground.desc")} +

+
+
+ { + setShowStats(checked); + }} + disabled={debug} + /> +

+ {t("manualRecording.showStats.desc")} +

+
+ )} -
- )} -
-
- {t("manualRecording.title")} -
-
- - -
-

- {t("manualRecording.tips")} -

-
- {isRestreamed && ( - <> -
+
{ - setPlayInBackground(checked); - }} - disabled={debug} + label={t("streaming.debugView", { ns: "components/dialog" })} + isChecked={debug} + onCheckedChange={(checked) => setDebug(checked)} /> -

- {t("manualRecording.playInBackground.desc")} -

-
- { - setShowStats(checked); - }} - disabled={debug} - /> -

- {t("manualRecording.showStats.desc")} -

-
- - )} -
- setDebug(checked)} - /> -
+
+
From 42fdadecd943228c2186614f2852eae999822f46 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Sun, 23 Nov 2025 12:35:35 -0600 Subject: [PATCH 4/5] clarify audio transcription docs --- docs/docs/configuration/audio_detectors.md | 8 +++++++- docs/docs/configuration/reference.md | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/docs/configuration/audio_detectors.md b/docs/docs/configuration/audio_detectors.md index bf71f8d81..3bf57b1a7 100644 --- a/docs/docs/configuration/audio_detectors.md +++ b/docs/docs/configuration/audio_detectors.md @@ -144,4 +144,10 @@ In order to use transcription and translation for past events, you must enable a The transcribed/translated speech will appear in the description box in the Tracked Object Details pane. If Semantic Search is enabled, embeddings are generated for the transcription text and are fully searchable using the description search type. -Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient. +:::note + +Only one `speech` event may be transcribed at a time. Frigate does not automatically transcribe `speech` events or implement a queue for long-running transcription model inference. + +::: + +Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a supported Nvidia GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient. diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 10a480393..f8b49303f 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -700,11 +700,11 @@ genai: # Optional: Configuration for audio transcription # NOTE: only the enabled option can be overridden at the camera level audio_transcription: - # Optional: Enable audio transcription (default: shown below) + # Optional: Enable live and speech event audio transcription (default: shown below) enabled: False - # Optional: The device to run the models on. (default: shown below) + # Optional: The device to run the models on for live transcription. (default: shown below) device: CPU - # Optional: Set the model size used for transcription. (default: shown below) + # Optional: Set the model size used for live transcription. (default: shown below) model_size: small # Optional: Set the language used for transcription translation. (default: shown below) # List of language codes: https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10 From 2c893aa1256660e2fe7c44fd0ba921aa7a6bcc08 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Mon, 24 Nov 2025 06:45:09 -0600 Subject: [PATCH 5/5] change audio transcription icon to activity indicator when transcription is in progress the backend doesn't implement any kind of queueing for speech event transcription --- frigate/api/classification.py | 1 + frigate/comms/dispatcher.py | 17 ++++++++++ frigate/const.py | 1 + .../post/audio_transcription.py | 5 +++ web/src/api/ws.tsx | 34 +++++++++++++++++++ .../overlay/detail/SearchDetailDialog.tsx | 20 +++++++++-- 6 files changed, 76 insertions(+), 2 deletions(-) diff --git a/frigate/api/classification.py b/frigate/api/classification.py index a2aec6898..9b116be10 100644 --- a/frigate/api/classification.py +++ b/frigate/api/classification.py @@ -542,6 +542,7 @@ def transcribe_audio(request: Request, body: AudioTranscriptionBody): status_code=409, # 409 Conflict ) else: + logger.debug(f"Failed to transcribe audio, response: {response}") return JSONResponse( content={ "success": False, diff --git a/frigate/comms/dispatcher.py b/frigate/comms/dispatcher.py index 235693c8c..0c2ba5a89 100644 --- a/frigate/comms/dispatcher.py +++ b/frigate/comms/dispatcher.py @@ -23,6 +23,7 @@ from frigate.const import ( NOTIFICATION_TEST, REQUEST_REGION_GRID, UPDATE_AUDIO_ACTIVITY, + UPDATE_AUDIO_TRANSCRIPTION_STATE, UPDATE_BIRDSEYE_LAYOUT, UPDATE_CAMERA_ACTIVITY, UPDATE_EMBEDDINGS_REINDEX_PROGRESS, @@ -61,6 +62,7 @@ class Dispatcher: self.model_state: dict[str, ModelStatusTypesEnum] = {} self.embeddings_reindex: dict[str, Any] = {} self.birdseye_layout: dict[str, Any] = {} + self.audio_transcription_state: str = "idle" self._camera_settings_handlers: dict[str, Callable] = { "audio": self._on_audio_command, "audio_transcription": self._on_audio_transcription_command, @@ -178,6 +180,19 @@ class Dispatcher: def handle_model_state() -> None: self.publish("model_state", json.dumps(self.model_state.copy())) + def handle_update_audio_transcription_state() -> None: + if payload: + self.audio_transcription_state = payload + self.publish( + "audio_transcription_state", + json.dumps(self.audio_transcription_state), + ) + + def handle_audio_transcription_state() -> None: + self.publish( + "audio_transcription_state", json.dumps(self.audio_transcription_state) + ) + def handle_update_embeddings_reindex_progress() -> None: self.embeddings_reindex = payload self.publish( @@ -264,10 +279,12 @@ class Dispatcher: UPDATE_MODEL_STATE: handle_update_model_state, UPDATE_EMBEDDINGS_REINDEX_PROGRESS: handle_update_embeddings_reindex_progress, UPDATE_BIRDSEYE_LAYOUT: handle_update_birdseye_layout, + UPDATE_AUDIO_TRANSCRIPTION_STATE: handle_update_audio_transcription_state, NOTIFICATION_TEST: handle_notification_test, "restart": handle_restart, "embeddingsReindexProgress": handle_embeddings_reindex_progress, "modelState": handle_model_state, + "audioTranscriptionState": handle_audio_transcription_state, "birdseyeLayout": handle_birdseye_layout, "onConnect": handle_on_connect, } diff --git a/frigate/const.py b/frigate/const.py index 5710966bf..11e89886f 100644 --- a/frigate/const.py +++ b/frigate/const.py @@ -113,6 +113,7 @@ CLEAR_ONGOING_REVIEW_SEGMENTS = "clear_ongoing_review_segments" UPDATE_CAMERA_ACTIVITY = "update_camera_activity" UPDATE_AUDIO_ACTIVITY = "update_audio_activity" EXPIRE_AUDIO_ACTIVITY = "expire_audio_activity" +UPDATE_AUDIO_TRANSCRIPTION_STATE = "update_audio_transcription_state" UPDATE_EVENT_DESCRIPTION = "update_event_description" UPDATE_REVIEW_DESCRIPTION = "update_review_description" UPDATE_MODEL_STATE = "update_model_state" diff --git a/frigate/data_processing/post/audio_transcription.py b/frigate/data_processing/post/audio_transcription.py index 870c34068..b7b6cb021 100644 --- a/frigate/data_processing/post/audio_transcription.py +++ b/frigate/data_processing/post/audio_transcription.py @@ -13,6 +13,7 @@ from frigate.config import FrigateConfig from frigate.const import ( CACHE_DIR, MODEL_CACHE_DIR, + UPDATE_AUDIO_TRANSCRIPTION_STATE, UPDATE_EVENT_DESCRIPTION, ) from frigate.data_processing.types import PostProcessDataEnum @@ -190,6 +191,8 @@ class AudioTranscriptionPostProcessor(PostProcessorApi): self.transcription_running = False self.transcription_thread = None + self.requestor.send_data(UPDATE_AUDIO_TRANSCRIPTION_STATE, "idle") + def handle_request(self, topic: str, request_data: dict[str, any]) -> str | None: if topic == "transcribe_audio": event = request_data["event"] @@ -203,6 +206,8 @@ class AudioTranscriptionPostProcessor(PostProcessorApi): # Mark as running and start the thread self.transcription_running = True + self.requestor.send_data(UPDATE_AUDIO_TRANSCRIPTION_STATE, "processing") + self.transcription_thread = threading.Thread( target=self._transcription_wrapper, args=(event,), daemon=True ) diff --git a/web/src/api/ws.tsx b/web/src/api/ws.tsx index 302f3f263..44d45ea2f 100644 --- a/web/src/api/ws.tsx +++ b/web/src/api/ws.tsx @@ -461,6 +461,40 @@ export function useEmbeddingsReindexProgress( return { payload: data }; } +export function useAudioTranscriptionProcessState( + revalidateOnFocus: boolean = true, +): { payload: string } { + const { + value: { payload }, + send: sendCommand, + } = useWs("audio_transcription_state", "audioTranscriptionState"); + + const data = useDeepMemo( + payload ? (JSON.parse(payload as string) as string) : "idle", + ); + + useEffect(() => { + let listener = undefined; + if (revalidateOnFocus) { + sendCommand("audioTranscriptionState"); + listener = () => { + if (document.visibilityState == "visible") { + sendCommand("audioTranscriptionState"); + } + }; + addEventListener("visibilitychange", listener); + } + return () => { + if (listener) { + removeEventListener("visibilitychange", listener); + } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [revalidateOnFocus]); + + return { payload: data || "idle" }; +} + export function useBirdseyeLayout(revalidateOnFocus: boolean = true): { payload: string; } { diff --git a/web/src/components/overlay/detail/SearchDetailDialog.tsx b/web/src/components/overlay/detail/SearchDetailDialog.tsx index 6b716a563..467008e92 100644 --- a/web/src/components/overlay/detail/SearchDetailDialog.tsx +++ b/web/src/components/overlay/detail/SearchDetailDialog.tsx @@ -92,6 +92,7 @@ import { DialogPortal } from "@radix-ui/react-dialog"; import { useDetailStream } from "@/context/detail-stream-context"; import { PiSlidersHorizontalBold } from "react-icons/pi"; import { HiSparkles } from "react-icons/hi"; +import { useAudioTranscriptionProcessState } from "@/api/ws"; const SEARCH_TABS = ["snapshot", "tracking_details"] as const; export type SearchTab = (typeof SEARCH_TABS)[number]; @@ -1076,6 +1077,11 @@ function ObjectDetailsTab({ }); }, [search, t]); + // audio transcription processing state + + const { payload: audioTranscriptionProcessState } = + useAudioTranscriptionProcessState(); + // frigate+ submission type SubmissionState = "reviewing" | "uploading" | "submitted"; @@ -1431,10 +1437,20 @@ function ObjectDetailsTab({