diff --git a/frigate/embeddings/onnx/jina_v2_embedding.py b/frigate/embeddings/onnx/jina_v2_embedding.py index fd4323f85..1abd968c9 100644 --- a/frigate/embeddings/onnx/jina_v2_embedding.py +++ b/frigate/embeddings/onnx/jina_v2_embedding.py @@ -3,6 +3,7 @@ import io import logging import os +import threading import numpy as np from PIL import Image @@ -53,6 +54,11 @@ class JinaV2Embedding(BaseEmbedding): self.tokenizer = None self.image_processor = None self.runner = None + + # Lock to prevent concurrent calls (text and vision share this instance) + self._call_lock = threading.Lock() + + # download the model and tokenizer files_names = list(self.download_urls.keys()) + [self.tokenizer_file] if not all( os.path.exists(os.path.join(self.download_path, n)) for n in files_names @@ -200,37 +206,40 @@ class JinaV2Embedding(BaseEmbedding): def __call__( self, inputs: list[str] | list[Image.Image] | list[str], embedding_type=None ) -> list[np.ndarray]: - self.embedding_type = embedding_type - if not self.embedding_type: - raise ValueError( - "embedding_type must be specified either in __init__ or __call__" - ) + # Lock the entire call to prevent race conditions when text and vision + # embeddings are called concurrently from different threads + with self._call_lock: + self.embedding_type = embedding_type + if not self.embedding_type: + raise ValueError( + "embedding_type must be specified either in __init__ or __call__" + ) - self._load_model_and_utils() - processed = self._preprocess_inputs(inputs) - batch_size = len(processed) + self._load_model_and_utils() + processed = self._preprocess_inputs(inputs) + batch_size = len(processed) - # Prepare ONNX inputs with matching batch sizes - onnx_inputs = {} - if self.embedding_type == "text": - onnx_inputs["input_ids"] = np.stack([x[0] for x in processed]) - onnx_inputs["pixel_values"] = np.zeros( - (batch_size, 3, 512, 512), dtype=np.float32 - ) - elif self.embedding_type == "vision": - onnx_inputs["input_ids"] = np.zeros((batch_size, 16), dtype=np.int64) - onnx_inputs["pixel_values"] = np.stack([x[0] for x in processed]) - else: - raise ValueError("Invalid embedding type") + # Prepare ONNX inputs with matching batch sizes + onnx_inputs = {} + if self.embedding_type == "text": + onnx_inputs["input_ids"] = np.stack([x[0] for x in processed]) + onnx_inputs["pixel_values"] = np.zeros( + (batch_size, 3, 512, 512), dtype=np.float32 + ) + elif self.embedding_type == "vision": + onnx_inputs["input_ids"] = np.zeros((batch_size, 16), dtype=np.int64) + onnx_inputs["pixel_values"] = np.stack([x[0] for x in processed]) + else: + raise ValueError("Invalid embedding type") - # Run inference - outputs = self.runner.run(onnx_inputs) - if self.embedding_type == "text": - embeddings = outputs[2] # text embeddings - elif self.embedding_type == "vision": - embeddings = outputs[3] # image embeddings - else: - raise ValueError("Invalid embedding type") + # Run inference + outputs = self.runner.run(onnx_inputs) + if self.embedding_type == "text": + embeddings = outputs[2] # text embeddings + elif self.embedding_type == "vision": + embeddings = outputs[3] # image embeddings + else: + raise ValueError("Invalid embedding type") - embeddings = self._postprocess_outputs(embeddings) - return [embedding for embedding in embeddings] + embeddings = self._postprocess_outputs(embeddings) + return [embedding for embedding in embeddings] diff --git a/web/src/components/filter/SearchFilterGroup.tsx b/web/src/components/filter/SearchFilterGroup.tsx index 3c44cad0c..fe9a70e18 100644 --- a/web/src/components/filter/SearchFilterGroup.tsx +++ b/web/src/components/filter/SearchFilterGroup.tsx @@ -251,11 +251,30 @@ function GeneralFilterButton({ updateLabelFilter, }: GeneralFilterButtonProps) { const { t } = useTranslation(["components/filter"]); + const { data: config } = useSWR("config", { + revalidateOnFocus: false, + }); const [open, setOpen] = useState(false); const [currentLabels, setCurrentLabels] = useState( selectedLabels, ); + const allAudioListenLabels = useMemo>(() => { + if (!config) { + return new Set(); + } + + const labels = new Set(); + Object.values(config.cameras).forEach((camera) => { + if (camera?.audio?.enabled) { + camera.audio.listen.forEach((label) => { + labels.add(label); + }); + } + }); + return labels; + }, [config]); + const buttonText = useMemo(() => { if (isMobile) { return t("labels.all.short"); @@ -266,13 +285,17 @@ function GeneralFilterButton({ } if (selectedLabels.length == 1) { - return getTranslatedLabel(selectedLabels[0]); + const label = selectedLabels[0]; + return getTranslatedLabel( + label, + allAudioListenLabels.has(label) ? "audio" : "object", + ); } return t("labels.count", { count: selectedLabels.length, }); - }, [selectedLabels, t]); + }, [selectedLabels, allAudioListenLabels, t]); // ui diff --git a/web/src/pages/FaceLibrary.tsx b/web/src/pages/FaceLibrary.tsx index 628928562..8ad6f70e5 100644 --- a/web/src/pages/FaceLibrary.tsx +++ b/web/src/pages/FaceLibrary.tsx @@ -925,11 +925,11 @@ function FaceAttemptGroup({ [onRefresh, t], ); - // Create ClassifiedEvent from Event (face recognition uses sub_label) const classifiedEvent: ClassifiedEvent | undefined = useMemo(() => { - if (!event || !event.sub_label || event.sub_label === "none") { + if (!event) { return undefined; } + return { id: event.id, label: event.sub_label, diff --git a/web/src/utils/i18n.ts b/web/src/utils/i18n.ts index eba5ff7a5..ca7ad8e25 100644 --- a/web/src/utils/i18n.ts +++ b/web/src/utils/i18n.ts @@ -79,6 +79,24 @@ i18n parseMissingKeyHandler: (key: string) => { const parts = key.split("."); + // eslint-disable-next-line no-console + console.warn(`Missing translation key: ${key}`); + + if (parts[0] === "time" && parts[1]?.includes("formattedTimestamp")) { + // Extract the format type from the last part (12hour, 24hour) + const formatType = parts[parts.length - 1]; + + // Return actual date-fns format strings as fallbacks + const formatDefaults: Record = { + "12hour": "h:mm aaa", + "24hour": "HH:mm", + }; + + if (formatDefaults[formatType]) { + return formatDefaults[formatType]; + } + } + // Handle special cases for objects and audio if (parts[0] === "object" || parts[0] === "audio") { return ( diff --git a/web/src/views/classification/ModelTrainingView.tsx b/web/src/views/classification/ModelTrainingView.tsx index 31e6f6d53..ec7ce0472 100644 --- a/web/src/views/classification/ModelTrainingView.tsx +++ b/web/src/views/classification/ModelTrainingView.tsx @@ -1043,34 +1043,22 @@ function ObjectTrainGrid({ return undefined; } - const classificationType = model.object_config.classification_type; + let label: string | undefined = undefined; + let score: number | undefined = undefined; - if (classificationType === "attribute") { - // For attribute type, look at event.data[model.name] - const attributeValue = event.data[model.name] as string | undefined; - const attributeScore = event.data[`${model.name}_score`] as - | number - | undefined; - - if (attributeValue && attributeValue !== "none") { - return { - id: event.id, - label: attributeValue, - score: attributeScore, - }; - } + if (model.object_config.classification_type === "attribute") { + label = event.data[model.name] as string | undefined; + score = event.data[`${model.name}_score`] as number | undefined; } else { - // For sub_label type, use event.sub_label - if (event.sub_label && event.sub_label !== "none") { - return { - id: event.id, - label: event.sub_label, - score: event.data?.sub_label_score, - }; - } + label = event.sub_label; + score = event.data.sub_label_score; } - return undefined; + return { + id: event.id, + label: label, + score: score, + }; }, [model], );