query sqlite first and pass those ids to chroma for embeddings search

This commit is contained in:
Josh Hawkins 2024-09-26 10:25:38 -05:00
parent 5415c6c999
commit d44750ab0d

View File

@ -404,87 +404,79 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
if include_thumbnails: if include_thumbnails:
selected_columns.append(Event.thumbnail) selected_columns.append(Event.thumbnail)
# Build the where clause for the embeddings query # Build the initial SQLite query filters
embeddings_filters = [] event_filters = []
if cameras != "all": if cameras != "all":
camera_list = cameras.split(",") camera_list = cameras.split(",")
embeddings_filters.append({"camera": {"$in": camera_list}}) event_filters.append((Event.camera << camera_list))
if labels != "all": if labels != "all":
label_list = labels.split(",") label_list = labels.split(",")
embeddings_filters.append({"label": {"$in": label_list}}) event_filters.append((Event.label << label_list))
if zones != "all": if zones != "all":
# use matching so events with multiple zones
# still match on a search where any zone matches
zone_clauses = []
filtered_zones = zones.split(",") filtered_zones = zones.split(",")
zone_filters = [{f"zones_{zone}": {"$eq": True}} for zone in filtered_zones]
if len(zone_filters) > 1: if "None" in filtered_zones:
embeddings_filters.append({"$or": zone_filters}) filtered_zones.remove("None")
else: zone_clauses.append((Event.zones.length() == 0))
embeddings_filters.append(zone_filters[0])
for zone in filtered_zones:
zone_clauses.append((Event.zones.cast("text") % f'*"{zone}"*'))
zone_clause = reduce(operator.or_, zone_clauses)
event_filters.append((zone_clause))
if after: if after:
embeddings_filters.append({"start_time": {"$gt": after}}) event_filters.append((Event.start_time > after))
if before: if before:
embeddings_filters.append({"start_time": {"$lt": before}}) event_filters.append((Event.start_time < before))
if time_range != DEFAULT_TIME_RANGE: if time_range != DEFAULT_TIME_RANGE:
# Get timezone arg to ensure browser times are used # get timezone arg to ensure browser times are used
tz_name = params.timezone tz_name = params.timezone
hour_modifier, minute_modifier, _ = get_tz_modifiers(tz_name) hour_modifier, minute_modifier, _ = get_tz_modifiers(tz_name)
times = time_range.split(",") times = time_range.split(",")
time_after = times[0] time_after = times[0]
time_before = times[1] time_before = times[1]
hour_modifier_value = int(hour_modifier.split()[0])
minute_modifier_value = int(minute_modifier.split()[0])
after_hour, after_minute = map(int, time_after.split(":")) start_hour_fun = fn.strftime(
before_hour, before_minute = map(int, time_before.split(":")) "%H:%M",
fn.datetime(Event.start_time, "unixepoch", hour_modifier, minute_modifier),
now = datetime.datetime.now()
tz_offset = datetime.timedelta(
hours=hour_modifier_value, minutes=minute_modifier_value
) )
after_time = ( # cases where user wants events overnight, ex: from 20:00 to 06:00
now.replace(hour=after_hour, minute=after_minute, second=0, microsecond=0) # should use or operator
+ tz_offset if time_after > time_before:
) event_filters.append(
before_time = ( (
now.replace(hour=before_hour, minute=before_minute, second=0, microsecond=0) reduce(
+ tz_offset operator.or_,
) [(start_hour_fun > time_after), (start_hour_fun < time_before)],
)
# Take midnight into account )
if after_time > before_time:
# Time range crosses midnight, so we need to split the filter
embeddings_filters.append(
{
"$or": [
{"start_time": {"$gte": after_time.timestamp()}},
{"start_time": {"$lt": before_time.timestamp()}},
]
}
) )
# all other cases should be and operator
else: else:
# Normal case where after_time is before before_time event_filters.append((start_hour_fun > time_after))
embeddings_filters.append( event_filters.append((start_hour_fun < time_before))
{
"$and": [
{"start_time": {"$gte": after_time.timestamp()}},
{"start_time": {"$lt": before_time.timestamp()}},
]
}
)
where = None filtered_event_ids = (
if len(embeddings_filters) > 1: Event.select(Event.id).where(*event_filters).tuples().iterator()
where = {"$and": embeddings_filters} )
elif len(embeddings_filters) == 1: event_ids = [event_id[0] for event_id in filtered_event_ids]
where = embeddings_filters[0]
if not event_ids:
return JSONResponse(content=[]) # No events to search on
# Build the Chroma where clause based on the event IDs
where = {"id": {"$in": event_ids}}
thumb_ids = {} thumb_ids = {}
desc_ids = {} desc_ids = {}