add the ability to choose searching text by thumbnail or description

This commit is contained in:
Jason Hunter 2023-12-19 23:01:25 -05:00
parent 5ac1baaba3
commit a029f13a7f
2 changed files with 67 additions and 25 deletions

View File

@ -1055,6 +1055,7 @@ def events():
min_length = request.args.get("min_length", type=float)
max_length = request.args.get("max_length", type=float)
search = request.args.get("search", type=str) or None
search_type = request.args.get("search_type", "all")
like = request.args.get("like", type=str) or None
clauses = []
@ -1229,24 +1230,37 @@ def events():
n_results=int(limit),
where=where,
)
event_order = dict(
zip(thumb_result["ids"][0], thumb_result["distances"][0])
)
for i, event_id in enumerate(thumb_result["ids"][0]):
event_order[event_id] = {
"distance": thumb_result["distances"][0][i],
"source": "thumbnail",
}
# For like, we want to remove all other filters
clauses = [(Event.id << list(event_order.keys()))]
elif search is not None:
# Grab the ids of the events that match based on CLIP embeddings
thumb_ids = {}
if search_type in ["all", "thumbnail"]:
thumbnails: Collection = current_app.embeddings.thumbnail
thumb_result: QueryResult = thumbnails.query(
query_texts=[search],
n_results=int(limit),
where=where,
)
thumb_ids = dict(zip(thumb_result["ids"][0], thumb_result["distances"][0]))
# Do a rudimentary normalization of the difference in distances returned by CLIP and MiniLM.
thumb_ids = dict(
zip(
thumb_result["ids"][0],
[d / 100 for d in thumb_result["distances"][0]],
)
)
# Grab the ids of the events that match based on MiniLM embeddings
desc_ids = {}
if search_type in ["all", "description"]:
descriptions: Collection = current_app.embeddings.description
desc_result: QueryResult = descriptions.query(
query_texts=[search],
@ -1255,9 +1269,17 @@ def events():
)
desc_ids = dict(zip(desc_result["ids"][0], desc_result["distances"][0]))
event_order = {
k: min(i for i in (thumb_ids.get(k), desc_ids.get(k)) if i is not None)
for k in thumb_ids.keys() | desc_ids
for event_id in thumb_ids.keys() | desc_ids:
min_distance = min(
i
for i in (thumb_ids.get(event_id), desc_ids.get(event_id))
if i is not None
)
event_order[event_id] = {
"distance": min_distance,
"source": "thumbnail"
if min_distance == thumb_ids.get(event_id)
else "description",
}
# For search, we want to keep all the other clauses and filters
@ -1275,10 +1297,14 @@ def events():
if event_order:
events = [
{**events, "search_similarity": event_order[events["id"]]}
{
**events,
"search_distance": event_order[events["id"]]["distance"],
"search_source": event_order[events["id"]]["source"],
}
for events in events
]
events = sorted(events, key=lambda x: x["search_similarity"])[:limit]
events = sorted(events, key=lambda x: x["search_distance"])[:limit]
return jsonify(events)

View File

@ -37,6 +37,7 @@ import { Score } from '../icons/Score';
import { About } from '../icons/About';
import MenuIcon from '../icons/Menu';
import { MenuOpen } from '../icons/MenuOpen';
import Select from '../components/Select';
const API_LIMIT = 25;
@ -404,9 +405,24 @@ export default function Events({ path, ...props }) {
<div className="space-y-4 p-2 px-4 w-full">
<Heading>Events</Heading>
{config.semantic_search.enabled && (
<div className="flex flex-wrap gap-2 items-center">
<div className="flex gap-2">
<div className="basis-4/5">
<TextField label="Search" onChangeText={(text) => onChangeSearchText(text)} />
</div>
<div className="basis-1/5">
<Select
label="Search Type"
selected={searchParams?.search_type ?? 'all'}
paramName="search_type"
options={[
{ value: 'all', label: 'All' },
{ value: 'thumbnail', label: 'Thumbnails' },
{ value: 'description', label: 'Descriptions' },
]}
onChange={(value) => onFilter('search_type', value.search_type)}
/>
</div>
</div>
)}
<div className="flex flex-wrap gap-2 items-center">
<MultiSelect