add scale_factor and bias to description zscore normalization

2026-02-15 07:35:27 +03:00 · 2024-10-07 15:20:45 -05:00 · 2024-10-07 15:20:45 -05:00 · e3a81db0bb
commit e3a81db0bb
parent 5cda95f5bf
4 changed files with 17 additions and 29 deletions
--- a/frigate/embeddings/init.py
+++ b/frigate/embeddings/init.py
@ -73,7 +73,7 @@ class EmbeddingsContext:
    def __init__(self, db: SqliteVecQueueDatabase):
        self.embeddings = Embeddings(db)
        self.thumb_stats = ZScoreNormalization()
-        self.desc_stats = ZScoreNormalization()
+        self.desc_stats = ZScoreNormalization(scale_factor=2.5, bias=0.5)

        # load stats from disk
        try:
--- a/frigate/embeddings/functions/minilm_l6_v2.py
+++ b/frigate/embeddings/functions/minilm_l6_v2.py
@ -46,7 +46,7 @@ class MiniLMEmbedding:
            elif os.path.basename(path) == self.TOKENIZER_FILE:
                logger.info("Downloading MiniLM tokenizer")
                tokenizer = AutoTokenizer.from_pretrained(
-                    self.MODEL_NAME, clean_up_tokenization_spaces=False
+                    self.MODEL_NAME, clean_up_tokenization_spaces=True
                )
                tokenizer.save_pretrained(path)

@ -78,7 +78,7 @@ class MiniLMEmbedding:
    def _load_tokenizer(self):
        tokenizer_path = os.path.join(self.DOWNLOAD_PATH, self.TOKENIZER_FILE)
        return AutoTokenizer.from_pretrained(
-            tokenizer_path, clean_up_tokenization_spaces=False
+            tokenizer_path, clean_up_tokenization_spaces=True
        )

    def _load_model(self, path: str, providers: List[str]):
--- a/frigate/embeddings/util.py
+++ b/frigate/embeddings/util.py
@ -4,12 +4,15 @@ import math


 class ZScoreNormalization:
-    """Running Z-score normalization for search distance."""
-
-    def __init__(self):
+    def __init__(self, scale_factor: float = 1.0, bias: float = 0.0):
+        """Initialize with optional scaling and bias adjustments."""
+        """scale_factor adjusts the magnitude of each score"""
+        """bias will artificially shift the entire distribution upwards"""
        self.n = 0
        self.mean = 0
        self.m2 = 0
+        self.scale_factor = scale_factor
+        self.bias = bias

    @property
    def variance(self):
@ -23,7 +26,10 @@ class ZScoreNormalization:
        self._update(distances)
        if self.stddev == 0:
            return distances
-        return [(x - self.mean) / self.stddev for x in distances]
+        return [
+            (x - self.mean) / self.stddev * self.scale_factor + self.bias
+            for x in distances
+        ]

    def _update(self, distances: list[float]):
        for x in distances:
--- a/web/src/views/search/SearchView.tsx
+++ b/web/src/views/search/SearchView.tsx
@ -189,19 +189,9 @@ export default function SearchView({

  // confidence score - probably needs tweaking

-  const zScoreToConfidence = (score: number, source: string) => {
-    let midpoint, scale;
-
-    if (source === "thumbnail") {
-      midpoint = 2;
-      scale = 0.5;
-    } else {
-      midpoint = 0.5;
-      scale = 1.5;
-    }
-
+  const zScoreToConfidence = (score: number) => {
    // Sigmoid function: 1 / (1 + e^x)
-    const confidence = 1 / (1 + Math.exp((score - midpoint) * scale));
+    const confidence = 1 / (1 + Math.exp(score));

    return Math.round(confidence * 100);
  };
@ -412,21 +402,13 @@ export default function SearchView({
                                ) : (
                                  <LuText className="mr-1 size-3" />
                                )}
-                                {zScoreToConfidence(
-                                  value.search_distance,
-                                  value.search_source,
-                                )}
-                                %
+                                {zScoreToConfidence(value.search_distance)}%
                              </Chip>
                            </TooltipTrigger>
                            <TooltipPortal>
                              <TooltipContent>
                                Matched {value.search_source} at{" "}
-                                {zScoreToConfidence(
-                                  value.search_distance,
-                                  value.search_source,
-                                )}
-                                %
+                                {zScoreToConfidence(value.search_distance)}%
                              </TooltipContent>
                            </TooltipPortal>
                          </Tooltip>