Refactor: Replace the ax_jinav2 model type with the axengine detector and jinav2

This commit is contained in:
shizhicheng 2026-03-05 22:11:10 +08:00 committed by ivanshi1108
parent 176f5cce66
commit c174956b29
4 changed files with 67 additions and 92 deletions

View File

@ -19,7 +19,6 @@ __all__ = [
class SemanticSearchModelEnum(str, Enum):
jinav1 = "jinav1"
jinav2 = "jinav2"
ax_jinav2 = "ax_jinav2"
class EnrichmentsDeviceEnum(str, Enum):

View File

@ -94,6 +94,10 @@ class Embeddings:
# Create tables if they don't exist
self.db.create_embeddings_tables()
self.has_axengine = any(
d.type == "axengine" for d in self.config.detectors.values()
)
models = self.get_model_definitions()
for model in models:
@ -106,25 +110,20 @@ class Embeddings:
)
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
# Single JinaV2Embedding instance for both text and vision
self.embedding = JinaV2Embedding(
model_size=self.config.semantic_search.model_size,
requestor=self.requestor,
device=config.semantic_search.device
or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
elif self.config.semantic_search.model == SemanticSearchModelEnum.ax_jinav2:
# AXJinaV2Embedding instance for both text and vision
self.embedding = AXJinaV2Embedding(
model_size=self.config.semantic_search.model_size,
requestor=self.requestor,
)
if self.has_axengine:
# AXJinaV2Embedding instance for both text and vision on Axera NPU
self.embedding = AXJinaV2Embedding(
model_size=self.config.semantic_search.model_size,
requestor=self.requestor,
)
else:
# Single JinaV2Embedding instance for both text and vision
self.embedding = JinaV2Embedding(
model_size=self.config.semantic_search.model_size,
requestor=self.requestor,
device=config.semantic_search.device
or ("GPU" if config.semantic_search.model_size == "large" else "CPU"),
)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
@ -151,13 +150,20 @@ class Embeddings:
def get_model_definitions(self):
# Version-specific models
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
models = [
"jinaai/jina-clip-v2-tokenizer",
"jinaai/jina-clip-v2-model_fp16.onnx"
if self.config.semantic_search.model_size == "large"
else "jinaai/jina-clip-v2-model_quantized.onnx",
"jinaai/jina-clip-v2-preprocessor_config.json",
]
if self.has_axengine:
models = [
"AXERA-TECH/jina-clip-v2-text_encoder.axmodel",
"AXERA-TECH/jina-clip-v2-image_encoder.axmodel",
"AXERA-TECH/jina-clip-v2-tokenizer",
]
else:
models = [
"jinaai/jina-clip-v2-tokenizer",
"jinaai/jina-clip-v2-model_fp16.onnx"
if self.config.semantic_search.model_size == "large"
else "jinaai/jina-clip-v2-model_quantized.onnx",
"jinaai/jina-clip-v2-preprocessor_config.json",
]
else: # Default to jinav1
models = [
"jinaai/jina-clip-v1-text_model_fp16.onnx",

View File

@ -292,17 +292,24 @@ export default function Explore() {
const modelVersion = config?.semantic_search.model || "jinav1";
const modelSize = config?.semantic_search.model_size || "small";
const isAxJinaV2 = modelVersion === "ax_jinav2";
const isAxJinaV2 = useMemo(
() =>
modelVersion === "jinav2" &&
Object.values(
(config?.detectors ?? {}) as Record<string, { type?: string }>,
).some((detector) => detector?.type === "axengine"),
[modelVersion, config?.detectors],
);
// Text model state
const { payload: textModelState } = useModelState(
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-text_encoder.axmodel"
: modelVersion === "jinav1"
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
: modelSize === "large"
? "jinaai/jina-clip-v2-model_fp16.onnx"
: "jinaai/jina-clip-v2-model_quantized.onnx",
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
: modelSize === "large"
? "jinaai/jina-clip-v2-model_fp16.onnx"
: "jinaai/jina-clip-v2-model_quantized.onnx",
);
// Tokenizer state
@ -310,8 +317,8 @@ export default function Explore() {
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-tokenizer"
: modelVersion === "jinav1"
? "jinaai/jina-clip-v1-tokenizer"
: "jinaai/jina-clip-v2-tokenizer",
? "jinaai/jina-clip-v1-tokenizer"
: "jinaai/jina-clip-v2-tokenizer",
);
// Vision model state (same as text model for jinav2)
@ -319,69 +326,30 @@ export default function Explore() {
isAxJinaV2
? "AXERA-TECH/jina-clip-v2-image_encoder.axmodel"
: modelVersion === "jinav1"
? modelSize === "large"
? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
: "jinaai/jina-clip-v1-vision_model_quantized.onnx"
: modelSize === "large"
? "jinaai/jina-clip-v2-model_fp16.onnx"
: "jinaai/jina-clip-v2-model_quantized.onnx";
? modelSize === "large"
? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
: "jinaai/jina-clip-v1-vision_model_quantized.onnx"
: modelSize === "large"
? "jinaai/jina-clip-v2-model_fp16.onnx"
: "jinaai/jina-clip-v2-model_quantized.onnx";
const { payload: visionModelState } = useModelState(visionModelFile);
// Preprocessor/feature extractor state
const { payload: visionFeatureExtractorStateRaw } = useModelState(
const { payload: visionFeatureExtractorState } = useModelState(
modelVersion === "jinav1"
? "jinaai/jina-clip-v1-preprocessor_config.json"
: "jinaai/jina-clip-v2-preprocessor_config.json",
);
const visionFeatureExtractorState = useMemo(() => {
if (isAxJinaV2) {
return visionModelState ?? "downloading";
}
return visionFeatureExtractorStateRaw;
}, [isAxJinaV2, visionModelState, visionFeatureExtractorStateRaw]);
const effectiveTextModelState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return textModelState ?? "downloading";
}
return textModelState;
}, [isAxJinaV2, textModelState]);
const effectiveTextTokenizerState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return textTokenizerState ?? "downloading";
}
return textTokenizerState;
}, [isAxJinaV2, textTokenizerState]);
const effectiveVisionModelState = useMemo<ModelState | undefined>(() => {
if (isAxJinaV2) {
return visionModelState ?? "downloading";
}
return visionModelState;
}, [isAxJinaV2, visionModelState]);
const allModelsLoaded = useMemo(() => {
if (isAxJinaV2) {
return (
effectiveTextModelState === "downloaded" &&
effectiveTextTokenizerState === "downloaded" &&
effectiveVisionModelState === "downloaded"
);
}
return (
textModelState === "downloaded" &&
textTokenizerState === "downloaded" &&
visionModelState === "downloaded" &&
visionFeatureExtractorState === "downloaded"
(isAxJinaV2 || visionFeatureExtractorState === "downloaded")
);
}, [
isAxJinaV2,
effectiveTextModelState,
effectiveTextTokenizerState,
effectiveVisionModelState,
textModelState,
textTokenizerState,
visionModelState,
@ -405,9 +373,9 @@ export default function Explore() {
!defaultViewLoaded ||
(config?.semantic_search.enabled &&
(!reindexState ||
!(isAxJinaV2 ? effectiveTextModelState : textModelState) ||
!(isAxJinaV2 ? effectiveTextTokenizerState : textTokenizerState) ||
!(isAxJinaV2 ? effectiveVisionModelState : visionModelState) ||
!textModelState ||
!textTokenizerState ||
!visionModelState ||
(!isAxJinaV2 && !visionFeatureExtractorState)))
) {
return (
@ -498,12 +466,14 @@ export default function Explore() {
"exploreIsUnavailable.downloadingModels.setup.visionModel",
)}
</div>
<div className="flex flex-row items-center justify-center gap-2">
{renderModelStateIcon(visionFeatureExtractorState)}
{t(
"exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor",
)}
</div>
{!isAxJinaV2 && (
<div className="flex flex-row items-center justify-center gap-2">
{renderModelStateIcon(visionFeatureExtractorState)}
{t(
"exploreIsUnavailable.downloadingModels.setup.visionModelFeatureExtractor",
)}
</div>
)}
<div className="flex flex-row items-center justify-center gap-2">
{renderModelStateIcon(textModelState)}
{t(
@ -520,7 +490,7 @@ export default function Explore() {
{(textModelState === "error" ||
textTokenizerState === "error" ||
visionModelState === "error" ||
visionFeatureExtractorState === "error") && (
(!isAxJinaV2 && visionFeatureExtractorState === "error")) && (
<div className="my-3 max-w-96 text-center text-danger">
{t("exploreIsUnavailable.downloadingModels.error")}
</div>

View File

@ -28,7 +28,7 @@ export interface FaceRecognitionConfig {
recognition_threshold: number;
}
export type SearchModel = "jinav1" | "jinav2" | "ax_jinav2";
export type SearchModel = "jinav1" | "jinav2";
export type SearchModelSize = "small" | "large";
export interface CameraConfig {