change audio transcription icon to activity indicator when transcription is in progress

the backend doesn't implement any kind of queueing for speech event transcription
This commit is contained in:
Josh Hawkins 2025-11-24 06:45:09 -06:00
parent 42fdadecd9
commit 2c893aa125
6 changed files with 76 additions and 2 deletions

View File

@ -542,6 +542,7 @@ def transcribe_audio(request: Request, body: AudioTranscriptionBody):
status_code=409, # 409 Conflict status_code=409, # 409 Conflict
) )
else: else:
logger.debug(f"Failed to transcribe audio, response: {response}")
return JSONResponse( return JSONResponse(
content={ content={
"success": False, "success": False,

View File

@ -23,6 +23,7 @@ from frigate.const import (
NOTIFICATION_TEST, NOTIFICATION_TEST,
REQUEST_REGION_GRID, REQUEST_REGION_GRID,
UPDATE_AUDIO_ACTIVITY, UPDATE_AUDIO_ACTIVITY,
UPDATE_AUDIO_TRANSCRIPTION_STATE,
UPDATE_BIRDSEYE_LAYOUT, UPDATE_BIRDSEYE_LAYOUT,
UPDATE_CAMERA_ACTIVITY, UPDATE_CAMERA_ACTIVITY,
UPDATE_EMBEDDINGS_REINDEX_PROGRESS, UPDATE_EMBEDDINGS_REINDEX_PROGRESS,
@ -61,6 +62,7 @@ class Dispatcher:
self.model_state: dict[str, ModelStatusTypesEnum] = {} self.model_state: dict[str, ModelStatusTypesEnum] = {}
self.embeddings_reindex: dict[str, Any] = {} self.embeddings_reindex: dict[str, Any] = {}
self.birdseye_layout: dict[str, Any] = {} self.birdseye_layout: dict[str, Any] = {}
self.audio_transcription_state: str = "idle"
self._camera_settings_handlers: dict[str, Callable] = { self._camera_settings_handlers: dict[str, Callable] = {
"audio": self._on_audio_command, "audio": self._on_audio_command,
"audio_transcription": self._on_audio_transcription_command, "audio_transcription": self._on_audio_transcription_command,
@ -178,6 +180,19 @@ class Dispatcher:
def handle_model_state() -> None: def handle_model_state() -> None:
self.publish("model_state", json.dumps(self.model_state.copy())) self.publish("model_state", json.dumps(self.model_state.copy()))
def handle_update_audio_transcription_state() -> None:
if payload:
self.audio_transcription_state = payload
self.publish(
"audio_transcription_state",
json.dumps(self.audio_transcription_state),
)
def handle_audio_transcription_state() -> None:
self.publish(
"audio_transcription_state", json.dumps(self.audio_transcription_state)
)
def handle_update_embeddings_reindex_progress() -> None: def handle_update_embeddings_reindex_progress() -> None:
self.embeddings_reindex = payload self.embeddings_reindex = payload
self.publish( self.publish(
@ -264,10 +279,12 @@ class Dispatcher:
UPDATE_MODEL_STATE: handle_update_model_state, UPDATE_MODEL_STATE: handle_update_model_state,
UPDATE_EMBEDDINGS_REINDEX_PROGRESS: handle_update_embeddings_reindex_progress, UPDATE_EMBEDDINGS_REINDEX_PROGRESS: handle_update_embeddings_reindex_progress,
UPDATE_BIRDSEYE_LAYOUT: handle_update_birdseye_layout, UPDATE_BIRDSEYE_LAYOUT: handle_update_birdseye_layout,
UPDATE_AUDIO_TRANSCRIPTION_STATE: handle_update_audio_transcription_state,
NOTIFICATION_TEST: handle_notification_test, NOTIFICATION_TEST: handle_notification_test,
"restart": handle_restart, "restart": handle_restart,
"embeddingsReindexProgress": handle_embeddings_reindex_progress, "embeddingsReindexProgress": handle_embeddings_reindex_progress,
"modelState": handle_model_state, "modelState": handle_model_state,
"audioTranscriptionState": handle_audio_transcription_state,
"birdseyeLayout": handle_birdseye_layout, "birdseyeLayout": handle_birdseye_layout,
"onConnect": handle_on_connect, "onConnect": handle_on_connect,
} }

View File

@ -113,6 +113,7 @@ CLEAR_ONGOING_REVIEW_SEGMENTS = "clear_ongoing_review_segments"
UPDATE_CAMERA_ACTIVITY = "update_camera_activity" UPDATE_CAMERA_ACTIVITY = "update_camera_activity"
UPDATE_AUDIO_ACTIVITY = "update_audio_activity" UPDATE_AUDIO_ACTIVITY = "update_audio_activity"
EXPIRE_AUDIO_ACTIVITY = "expire_audio_activity" EXPIRE_AUDIO_ACTIVITY = "expire_audio_activity"
UPDATE_AUDIO_TRANSCRIPTION_STATE = "update_audio_transcription_state"
UPDATE_EVENT_DESCRIPTION = "update_event_description" UPDATE_EVENT_DESCRIPTION = "update_event_description"
UPDATE_REVIEW_DESCRIPTION = "update_review_description" UPDATE_REVIEW_DESCRIPTION = "update_review_description"
UPDATE_MODEL_STATE = "update_model_state" UPDATE_MODEL_STATE = "update_model_state"

View File

@ -13,6 +13,7 @@ from frigate.config import FrigateConfig
from frigate.const import ( from frigate.const import (
CACHE_DIR, CACHE_DIR,
MODEL_CACHE_DIR, MODEL_CACHE_DIR,
UPDATE_AUDIO_TRANSCRIPTION_STATE,
UPDATE_EVENT_DESCRIPTION, UPDATE_EVENT_DESCRIPTION,
) )
from frigate.data_processing.types import PostProcessDataEnum from frigate.data_processing.types import PostProcessDataEnum
@ -190,6 +191,8 @@ class AudioTranscriptionPostProcessor(PostProcessorApi):
self.transcription_running = False self.transcription_running = False
self.transcription_thread = None self.transcription_thread = None
self.requestor.send_data(UPDATE_AUDIO_TRANSCRIPTION_STATE, "idle")
def handle_request(self, topic: str, request_data: dict[str, any]) -> str | None: def handle_request(self, topic: str, request_data: dict[str, any]) -> str | None:
if topic == "transcribe_audio": if topic == "transcribe_audio":
event = request_data["event"] event = request_data["event"]
@ -203,6 +206,8 @@ class AudioTranscriptionPostProcessor(PostProcessorApi):
# Mark as running and start the thread # Mark as running and start the thread
self.transcription_running = True self.transcription_running = True
self.requestor.send_data(UPDATE_AUDIO_TRANSCRIPTION_STATE, "processing")
self.transcription_thread = threading.Thread( self.transcription_thread = threading.Thread(
target=self._transcription_wrapper, args=(event,), daemon=True target=self._transcription_wrapper, args=(event,), daemon=True
) )

View File

@ -461,6 +461,40 @@ export function useEmbeddingsReindexProgress(
return { payload: data }; return { payload: data };
} }
export function useAudioTranscriptionProcessState(
revalidateOnFocus: boolean = true,
): { payload: string } {
const {
value: { payload },
send: sendCommand,
} = useWs("audio_transcription_state", "audioTranscriptionState");
const data = useDeepMemo(
payload ? (JSON.parse(payload as string) as string) : "idle",
);
useEffect(() => {
let listener = undefined;
if (revalidateOnFocus) {
sendCommand("audioTranscriptionState");
listener = () => {
if (document.visibilityState == "visible") {
sendCommand("audioTranscriptionState");
}
};
addEventListener("visibilitychange", listener);
}
return () => {
if (listener) {
removeEventListener("visibilitychange", listener);
}
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [revalidateOnFocus]);
return { payload: data || "idle" };
}
export function useBirdseyeLayout(revalidateOnFocus: boolean = true): { export function useBirdseyeLayout(revalidateOnFocus: boolean = true): {
payload: string; payload: string;
} { } {

View File

@ -92,6 +92,7 @@ import { DialogPortal } from "@radix-ui/react-dialog";
import { useDetailStream } from "@/context/detail-stream-context"; import { useDetailStream } from "@/context/detail-stream-context";
import { PiSlidersHorizontalBold } from "react-icons/pi"; import { PiSlidersHorizontalBold } from "react-icons/pi";
import { HiSparkles } from "react-icons/hi"; import { HiSparkles } from "react-icons/hi";
import { useAudioTranscriptionProcessState } from "@/api/ws";
const SEARCH_TABS = ["snapshot", "tracking_details"] as const; const SEARCH_TABS = ["snapshot", "tracking_details"] as const;
export type SearchTab = (typeof SEARCH_TABS)[number]; export type SearchTab = (typeof SEARCH_TABS)[number];
@ -1076,6 +1077,11 @@ function ObjectDetailsTab({
}); });
}, [search, t]); }, [search, t]);
// audio transcription processing state
const { payload: audioTranscriptionProcessState } =
useAudioTranscriptionProcessState();
// frigate+ submission // frigate+ submission
type SubmissionState = "reviewing" | "uploading" | "submitted"; type SubmissionState = "reviewing" | "uploading" | "submitted";
@ -1431,10 +1437,20 @@ function ObjectDetailsTab({
<TooltipTrigger asChild> <TooltipTrigger asChild>
<button <button
aria-label={t("itemMenu.audioTranscription.label")} aria-label={t("itemMenu.audioTranscription.label")}
className="text-primary/40 hover:text-primary/80" className={cn(
"text-primary/40",
audioTranscriptionProcessState === "processing"
? "cursor-not-allowed"
: "hover:text-primary/80",
)}
onClick={onTranscribe} onClick={onTranscribe}
disabled={audioTranscriptionProcessState === "processing"}
> >
<FaMicrophone className="size-4" /> {audioTranscriptionProcessState === "processing" ? (
<ActivityIndicator className="size-4" />
) : (
<FaMicrophone className="size-4" />
)}
</button> </button>
</TooltipTrigger> </TooltipTrigger>
<TooltipContent> <TooltipContent>