remove unnecessary re-embed

This commit is contained in:
Josh Hawkins 2024-10-05 12:20:50 -05:00
parent 3e58a6dfaf
commit bc528123b3

View File

@ -105,6 +105,8 @@ class Embeddings:
(event_id, serialize(embedding)), (event_id, serialize(embedding)),
) )
return embedding
def upsert_description(self, event_id: str, description: str): def upsert_description(self, event_id: str, description: str):
# Generate embedding using MiniLM # Generate embedding using MiniLM
embedding = self.minilm_embedding([description])[0] embedding = self.minilm_embedding([description])[0]
@ -117,6 +119,8 @@ class Embeddings:
(event_id, serialize(embedding)), (event_id, serialize(embedding)),
) )
return embedding
def delete_thumbnail(self, event_ids: List[str]) -> None: def delete_thumbnail(self, event_ids: List[str]) -> None:
ids = ",".join(["?" for _ in event_ids]) ids = ",".join(["?" for _ in event_ids])
self.db.execute_sql( self.db.execute_sql(
@ -147,11 +151,9 @@ class Embeddings:
row[0] row[0]
) # Deserialize the thumbnail embedding ) # Deserialize the thumbnail embedding
else: else:
# If no embedding found, generate it # If no embedding found, generate it and return it
thumbnail = base64.b64decode(query.thumbnail) thumbnail = base64.b64decode(query.thumbnail)
self.upsert_thumbnail(query.id, thumbnail) query_embedding = self.upsert_thumbnail(query.id, thumbnail)
image = Image.open(io.BytesIO(thumbnail)).convert("RGB")
query_embedding = self.clip_embedding([image])[0]
else: else:
query_embedding = self.clip_embedding([query])[0] query_embedding = self.clip_embedding([query])[0]
@ -166,9 +168,12 @@ class Embeddings:
# Add the IN clause if event_ids is provided and not empty # Add the IN clause if event_ids is provided and not empty
# this is the only filter supported by sqlite-vec as of 0.1.3 # this is the only filter supported by sqlite-vec as of 0.1.3
# but it seems to be broken in this version
if event_ids: if event_ids:
sql_query += " AND id IN ({})".format(",".join("?" * len(event_ids))) sql_query += " AND id IN ({})".format(",".join("?" * len(event_ids)))
# order by distance DESC is not implemented in this version of sqlite-vec
# when it's implemented, we can use cosine similarity
sql_query += " ORDER BY distance" sql_query += " ORDER BY distance"
parameters = ( parameters = (
@ -198,9 +203,12 @@ class Embeddings:
# Add the IN clause if event_ids is provided and not empty # Add the IN clause if event_ids is provided and not empty
# this is the only filter supported by sqlite-vec as of 0.1.3 # this is the only filter supported by sqlite-vec as of 0.1.3
# but it seems to be broken in this version
if event_ids: if event_ids:
sql_query += " AND id IN ({})".format(",".join("?" * len(event_ids))) sql_query += " AND id IN ({})".format(",".join("?" * len(event_ids)))
# order by distance DESC is not implemented in this version of sqlite-vec
# when it's implemented, we can use cosine similarity
sql_query += " ORDER BY distance" sql_query += " ORDER BY distance"
parameters = ( parameters = (