From c174956b2939752518f90d749ea6a5c7820fb12b Mon Sep 17 00:00:00 2001 From: shizhicheng Date: Thu, 5 Mar 2026 22:11:10 +0800 Subject: [PATCH] Refactor: Replace the ax_jinav2 model type with the axengine detector and jinav2 --- frigate/config/classification.py | 1 - frigate/embeddings/embeddings.py | 58 ++++++++++--------- web/src/pages/Explore.tsx | 98 +++++++++++--------------------- web/src/types/frigateConfig.ts | 2 +- 4 files changed, 67 insertions(+), 92 deletions(-) diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 9d5b16561..fb8e3de29 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -19,7 +19,6 @@ __all__ = [ class SemanticSearchModelEnum(str, Enum): jinav1 = "jinav1" jinav2 = "jinav2" - ax_jinav2 = "ax_jinav2" class EnrichmentsDeviceEnum(str, Enum): diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 835986a58..f306c0982 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -94,6 +94,10 @@ class Embeddings: # Create tables if they don't exist self.db.create_embeddings_tables() + self.has_axengine = any( + d.type == "axengine" for d in self.config.detectors.values() + ) + models = self.get_model_definitions() for model in models: @@ -106,25 +110,20 @@ class Embeddings: ) if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - # Single JinaV2Embedding instance for both text and vision - self.embedding = JinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - device=config.semantic_search.device - or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), - ) - self.text_embedding = lambda input_data: self.embedding( - input_data, embedding_type="text" - ) - self.vision_embedding = lambda input_data: self.embedding( - input_data, embedding_type="vision" - ) - elif self.config.semantic_search.model == SemanticSearchModelEnum.ax_jinav2: - # AXJinaV2Embedding instance for both text and vision - self.embedding = AXJinaV2Embedding( - model_size=self.config.semantic_search.model_size, - requestor=self.requestor, - ) + if self.has_axengine: + # AXJinaV2Embedding instance for both text and vision on Axera NPU + self.embedding = AXJinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + ) + else: + # Single JinaV2Embedding instance for both text and vision + self.embedding = JinaV2Embedding( + model_size=self.config.semantic_search.model_size, + requestor=self.requestor, + device=config.semantic_search.device + or ("GPU" if config.semantic_search.model_size == "large" else "CPU"), + ) self.text_embedding = lambda input_data: self.embedding( input_data, embedding_type="text" ) @@ -151,13 +150,20 @@ class Embeddings: def get_model_definitions(self): # Version-specific models if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2: - models = [ - "jinaai/jina-clip-v2-tokenizer", - "jinaai/jina-clip-v2-model_fp16.onnx" - if self.config.semantic_search.model_size == "large" - else "jinaai/jina-clip-v2-model_quantized.onnx", - "jinaai/jina-clip-v2-preprocessor_config.json", - ] + if self.has_axengine: + models = [ + "AXERA-TECH/jina-clip-v2-text_encoder.axmodel", + "AXERA-TECH/jina-clip-v2-image_encoder.axmodel", + "AXERA-TECH/jina-clip-v2-tokenizer", + ] + else: + models = [ + "jinaai/jina-clip-v2-tokenizer", + "jinaai/jina-clip-v2-model_fp16.onnx" + if self.config.semantic_search.model_size == "large" + else "jinaai/jina-clip-v2-model_quantized.onnx", + "jinaai/jina-clip-v2-preprocessor_config.json", + ] else: # Default to jinav1 models = [ "jinaai/jina-clip-v1-text_model_fp16.onnx", diff --git a/web/src/pages/Explore.tsx b/web/src/pages/Explore.tsx index 4ff0a2020..98c1a653c 100644 --- a/web/src/pages/Explore.tsx +++ b/web/src/pages/Explore.tsx @@ -292,17 +292,24 @@ export default function Explore() { const modelVersion = config?.semantic_search.model || "jinav1"; const modelSize = config?.semantic_search.model_size || "small"; - const isAxJinaV2 = modelVersion === "ax_jinav2"; + const isAxJinaV2 = useMemo( + () => + modelVersion === "jinav2" && + Object.values( + (config?.detectors ?? {}) as Record, + ).some((detector) => detector?.type === "axengine"), + [modelVersion, config?.detectors], + ); // Text model state const { payload: textModelState } = useModelState( isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel" : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-text_model_fp16.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx", + ? "jinaai/jina-clip-v1-text_model_fp16.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx", ); // Tokenizer state @@ -310,8 +317,8 @@ export default function Explore() { isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-tokenizer" : modelVersion === "jinav1" - ? "jinaai/jina-clip-v1-tokenizer" - : "jinaai/jina-clip-v2-tokenizer", + ? "jinaai/jina-clip-v1-tokenizer" + : "jinaai/jina-clip-v2-tokenizer", ); // Vision model state (same as text model for jinav2) @@ -319,69 +326,30 @@ export default function Explore() { isAxJinaV2 ? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel" : modelVersion === "jinav1" - ? modelSize === "large" - ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" - : "jinaai/jina-clip-v1-vision_model_quantized.onnx" - : modelSize === "large" - ? "jinaai/jina-clip-v2-model_fp16.onnx" - : "jinaai/jina-clip-v2-model_quantized.onnx"; + ? modelSize === "large" + ? "jinaai/jina-clip-v1-vision_model_fp16.onnx" + : "jinaai/jina-clip-v1-vision_model_quantized.onnx" + : modelSize === "large" + ? "jinaai/jina-clip-v2-model_fp16.onnx" + : "jinaai/jina-clip-v2-model_quantized.onnx"; const { payload: visionModelState } = useModelState(visionModelFile); // Preprocessor/feature extractor state - const { payload: visionFeatureExtractorStateRaw } = useModelState( + const { payload: visionFeatureExtractorState } = useModelState( modelVersion === "jinav1" ? "jinaai/jina-clip-v1-preprocessor_config.json" : "jinaai/jina-clip-v2-preprocessor_config.json", ); - - const visionFeatureExtractorState = useMemo(() => { - if (isAxJinaV2) { - return visionModelState ?? "downloading"; - } - return visionFeatureExtractorStateRaw; - }, [isAxJinaV2, visionModelState, visionFeatureExtractorStateRaw]); - - const effectiveTextModelState = useMemo(() => { - if (isAxJinaV2) { - return textModelState ?? "downloading"; - } - return textModelState; - }, [isAxJinaV2, textModelState]); - - const effectiveTextTokenizerState = useMemo(() => { - if (isAxJinaV2) { - return textTokenizerState ?? "downloading"; - } - return textTokenizerState; - }, [isAxJinaV2, textTokenizerState]); - - const effectiveVisionModelState = useMemo(() => { - if (isAxJinaV2) { - return visionModelState ?? "downloading"; - } - return visionModelState; - }, [isAxJinaV2, visionModelState]); - const allModelsLoaded = useMemo(() => { - if (isAxJinaV2) { - return ( - effectiveTextModelState === "downloaded" && - effectiveTextTokenizerState === "downloaded" && - effectiveVisionModelState === "downloaded" - ); - } return ( textModelState === "downloaded" && textTokenizerState === "downloaded" && visionModelState === "downloaded" && - visionFeatureExtractorState === "downloaded" + (isAxJinaV2 || visionFeatureExtractorState === "downloaded") ); }, [ isAxJinaV2, - effectiveTextModelState, - effectiveTextTokenizerState, - effectiveVisionModelState, textModelState, textTokenizerState, visionModelState, @@ -405,9 +373,9 @@ export default function Explore() { !defaultViewLoaded || (config?.semantic_search.enabled && (!reindexState || - !(isAxJinaV2 ? effectiveTextModelState : textModelState) || - !(isAxJinaV2 ? effectiveTextTokenizerState : textTokenizerState) || - !(isAxJinaV2 ? effectiveVisionModelState : visionModelState) || + !textModelState || + !textTokenizerState || + !visionModelState || (!isAxJinaV2 && !visionFeatureExtractorState))) ) { return ( @@ -498,12 +466,14 @@ export default function Explore() { "exploreIsUnavailable.downloadingModels.setup.visionModel", )} -
- {renderModelStateIcon(visionFeatureExtractorState)} - {t( - "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", - )} -
+ {!isAxJinaV2 && ( +
+ {renderModelStateIcon(visionFeatureExtractorState)} + {t( + "exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor", + )} +
+ )}
{renderModelStateIcon(textModelState)} {t( @@ -520,7 +490,7 @@ export default function Explore() { {(textModelState === "error" || textTokenizerState === "error" || visionModelState === "error" || - visionFeatureExtractorState === "error") && ( + (!isAxJinaV2 && visionFeatureExtractorState === "error")) && (
{t("exploreIsUnavailable.downloadingModels.error")}
diff --git a/web/src/types/frigateConfig.ts b/web/src/types/frigateConfig.ts index 369160319..94c9ba6e9 100644 --- a/web/src/types/frigateConfig.ts +++ b/web/src/types/frigateConfig.ts @@ -28,7 +28,7 @@ export interface FaceRecognitionConfig { recognition_threshold: number; } -export type SearchModel = "jinav1" | "jinav2" | "ax_jinav2"; +export type SearchModel = "jinav1" | "jinav2"; export type SearchModelSize = "small" | "large"; export interface CameraConfig {