Only save stats for multi modal searches and only use cosine similarity for image -> image search

This commit is contained in:
Josh Hawkins 2024-10-11 12:59:51 -05:00
parent 673394d9a1
commit 1165c42a8d
3 changed files with 17 additions and 29 deletions

View File

@ -481,20 +481,15 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
else:
search_types = search_type.split(",")
# only normalize multi-modal searches
apply_normalization = (
"thumbnail" in search_types and "description" in search_types
)
# only save stats for multi-modal searches
save_stats = "thumbnail" in search_types and "description" in search_types
if "thumbnail" in search_types:
thumb_result = context.search_thumbnail(query)
if apply_normalization:
thumb_distances = context.thumb_stats.normalize(
[result[1] for result in thumb_result]
[result[1] for result in thumb_result], save_stats
)
else:
thumb_distances = [result[1] for result in thumb_result]
thumb_ids = dict(
zip([result[0] for result in thumb_result], thumb_distances)
@ -508,14 +503,10 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
if "description" in search_types:
desc_result = context.search_description(query)
if apply_normalization:
desc_distances = context.desc_stats.normalize(
[result[1] for result in desc_result]
[result[1] for result in desc_result], save_stats
)
else:
desc_distances = [
result[1] for result in desc_result
] # Use raw distances
desc_ids = dict(zip([result[0] for result in desc_result], desc_distances))

View File

@ -22,7 +22,8 @@ class ZScoreNormalization:
def stddev(self):
return math.sqrt(self.variance) if self.variance > 0 else 0.0
def normalize(self, distances: list[float]):
def normalize(self, distances: list[float], save_stats: bool):
if save_stats:
self._update(distances)
if self.stddev == 0:
return distances

View File

@ -190,17 +190,13 @@ export default function SearchView({
// confidence score
const zScoreToConfidence = (score: number) => {
// Normalizing is needed for multi-modal searches only
// Normalizing is not needed for similarity searches
// Sigmoid function for normalized: 1 / (1 + e^x)
// Cosine for non-normalized
// Cosine for similarity
if (searchFilter) {
const normalized =
!searchFilter.search_type ||
(Array.isArray(searchFilter.search_type) &&
searchFilter.search_type.length === 2 &&
searchFilter.search_type.includes("thumbnail") &&
searchFilter.search_type.includes("description"));
const confidence = normalized ? 1 / (1 + Math.exp(score)) : 1 - score;
const notNormalized = searchFilter?.search_type?.includes("similarity");
const confidence = notNormalized ? 1 - score : 1 / (1 + Math.exp(score));
return Math.round(confidence * 100);
}