From 6294ce7807ab7ea151e52a7c257a2d0621cb8c12 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Thu, 17 Oct 2024 10:21:20 -0500
Subject: [PATCH 1/5] Adjust Explore settings (#14409)

* Re-add search source chip without confidence percentage

* add confidence to tooltip only

* move search type to settings

* padding tweak

* docs update

* docs clarity
---
 docs/docs/configuration/semantic_search.md    |  6 +-
 .../overlay/dialog/SearchFilterDialog.tsx     | 74 +---------------
 .../components/settings/SearchSettings.tsx    | 84 +++++++++++++++++++
 web/src/views/search/SearchView.tsx           | 50 ++++++++++-
 4 files changed, 137 insertions(+), 77 deletions(-)

diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index a569e8f1a..18093a479 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -53,7 +53,7 @@ semantic_search:
 ## Usage
 
 1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results.
-2. The comparison between text and image embedding distances generally means that results matching `description` will appear first, even if a `thumbnail` embedding may be a better match. Play with the "Search Type" filter to help find what you are looking for.
-3. Make your search language and tone closely match your descriptions. If you are using thumbnail search, phrase your query as an image caption.
+2. Because of how the AI models Frigate uses have been trained, the comparison between text and image embedding distances generally means that results matching `description` will appear first, even if a `thumbnail` embedding may be a better match. Play with the "Search Type" setting to help find what you are looking for. Note that if you are generating descriptions for specific objects or zones only, this may cause search results to prioritize the objects with descriptions even if the the ones without them are more relevant.
+3. Make your search language and tone closely match your descriptions. If you are using thumbnail search, **phrase your query as an image caption**. For example "red car" will not work as well as "red sedan driving down a residential street on a sunny day".
 4. Semantic search on thumbnails tends to return better results when matching large subjects that take up most of the frame. Small things like "cat" tend to not work well.
-5. Experiment! Find a tracked object you want to test and start typing keywords to see what works for you.
+5. Experiment! Find a tracked object you want to test and start typing keywords and phrases to see what works for you.
diff --git a/web/src/components/overlay/dialog/SearchFilterDialog.tsx b/web/src/components/overlay/dialog/SearchFilterDialog.tsx
index ad9fe1c2b..ed091b350 100644
--- a/web/src/components/overlay/dialog/SearchFilterDialog.tsx
+++ b/web/src/components/overlay/dialog/SearchFilterDialog.tsx
@@ -65,9 +65,7 @@ export default function SearchFilterDialog({
         (currentFilter.min_score ?? 0) > 0.5 ||
         (currentFilter.max_score ?? 1) < 1 ||
         (currentFilter.zones?.length ?? 0) > 0 ||
-        (currentFilter.sub_labels?.length ?? 0) > 0 ||
-        (!currentFilter.search_type?.includes("similarity") &&
-          (currentFilter.search_type?.length ?? 2) !== 2)),
+        (currentFilter.sub_labels?.length ?? 0) > 0),
     [currentFilter],
   );
 
@@ -115,20 +113,6 @@ export default function SearchFilterDialog({
           setCurrentFilter({ ...currentFilter, min_score: min, max_score: max })
         }
       />
-      {config?.semantic_search?.enabled &&
-        !currentFilter?.search_type?.includes("similarity") && (
-          <SearchTypeContent
-            searchSources={
-              currentFilter?.search_type ?? ["thumbnail", "description"]
-            }
-            setSearchSources={(newSearchSource) =>
-              setCurrentFilter({
-                ...currentFilter,
-                search_type: newSearchSource,
-              })
-            }
-          />
-        )}
       {isDesktop && <DropdownMenuSeparator />}
       <div className="flex items-center justify-evenly p-2">
         <Button
@@ -491,59 +475,3 @@ export function ScoreFilterContent({
     </div>
   );
 }
-
-type SearchTypeContentProps = {
-  searchSources: SearchSource[] | undefined;
-  setSearchSources: (sources: SearchSource[] | undefined) => void;
-};
-export function SearchTypeContent({
-  searchSources,
-  setSearchSources,
-}: SearchTypeContentProps) {
-  return (
-    <>
-      <div className="overflow-x-hidden">
-        <DropdownMenuSeparator className="mb-3" />
-        <div className="text-lg">Search Sources</div>
-        <div className="mt-2.5 flex flex-col gap-2.5">
-          <FilterSwitch
-            label="Thumbnail Image"
-            isChecked={searchSources?.includes("thumbnail") ?? false}
-            onCheckedChange={(isChecked) => {
-              const updatedSources = searchSources ? [...searchSources] : [];
-
-              if (isChecked) {
-                updatedSources.push("thumbnail");
-                setSearchSources(updatedSources);
-              } else {
-                if (updatedSources.length > 1) {
-                  const index = updatedSources.indexOf("thumbnail");
-                  if (index !== -1) updatedSources.splice(index, 1);
-                  setSearchSources(updatedSources);
-                }
-              }
-            }}
-          />
-          <FilterSwitch
-            label="Description"
-            isChecked={searchSources?.includes("description") ?? false}
-            onCheckedChange={(isChecked) => {
-              const updatedSources = searchSources ? [...searchSources] : [];
-
-              if (isChecked) {
-                updatedSources.push("description");
-                setSearchSources(updatedSources);
-              } else {
-                if (updatedSources.length > 1) {
-                  const index = updatedSources.indexOf("description");
-                  if (index !== -1) updatedSources.splice(index, 1);
-                  setSearchSources(updatedSources);
-                }
-              }
-            }}
-          />
-        </div>
-      </div>
-    </>
-  );
-}
diff --git a/web/src/components/settings/SearchSettings.tsx b/web/src/components/settings/SearchSettings.tsx
index b3a1e89d3..1aab31626 100644
--- a/web/src/components/settings/SearchSettings.tsx
+++ b/web/src/components/settings/SearchSettings.tsx
@@ -13,23 +13,36 @@ import {
   SelectTrigger,
 } from "@/components/ui/select";
 import { DropdownMenuSeparator } from "../ui/dropdown-menu";
+import FilterSwitch from "../filter/FilterSwitch";
+import { SearchFilter, SearchSource } from "@/types/search";
+import useSWR from "swr";
+import { FrigateConfig } from "@/types/frigateConfig";
 
 type SearchSettingsProps = {
   className?: string;
   columns: number;
   defaultView: string;
+  filter?: SearchFilter;
   setColumns: (columns: number) => void;
   setDefaultView: (view: string) => void;
+  onUpdateFilter: (filter: SearchFilter) => void;
 };
 export default function SearchSettings({
   className,
   columns,
   setColumns,
   defaultView,
+  filter,
   setDefaultView,
+  onUpdateFilter,
 }: SearchSettingsProps) {
+  const { data: config } = useSWR<FrigateConfig>("config");
   const [open, setOpen] = useState(false);
 
+  const [searchSources, setSearchSources] = useState<SearchSource[]>([
+    "thumbnail",
+  ]);
+
   const trigger = (
     <Button className="flex items-center gap-2" size="sm">
       <FaCog className="text-secondary-foreground" />
@@ -94,6 +107,15 @@ export default function SearchSettings({
           </div>
         </>
       )}
+      {config?.semantic_search?.enabled && (
+        <SearchTypeContent
+          searchSources={searchSources}
+          setSearchSources={(sources) => {
+            setSearchSources(sources as SearchSource[]);
+            onUpdateFilter({ ...filter, search_type: sources });
+          }}
+        />
+      )}
     </div>
   );
 
@@ -113,3 +135,65 @@ export default function SearchSettings({
     />
   );
 }
+
+type SearchTypeContentProps = {
+  searchSources: SearchSource[] | undefined;
+  setSearchSources: (sources: SearchSource[] | undefined) => void;
+};
+export function SearchTypeContent({
+  searchSources,
+  setSearchSources,
+}: SearchTypeContentProps) {
+  return (
+    <>
+      <div className="overflow-x-hidden">
+        <DropdownMenuSeparator className="mb-3" />
+        <div className="space-y-0.5">
+          <div className="text-md">Search Source</div>
+          <div className="space-y-1 text-xs text-muted-foreground">
+            Choose whether to search the thumbnails or descriptions of your
+            tracked objects.
+          </div>
+        </div>
+        <div className="mt-2.5 flex flex-col gap-2.5">
+          <FilterSwitch
+            label="Thumbnail Image"
+            isChecked={searchSources?.includes("thumbnail") ?? false}
+            onCheckedChange={(isChecked) => {
+              const updatedSources = searchSources ? [...searchSources] : [];
+
+              if (isChecked) {
+                updatedSources.push("thumbnail");
+                setSearchSources(updatedSources);
+              } else {
+                if (updatedSources.length > 1) {
+                  const index = updatedSources.indexOf("thumbnail");
+                  if (index !== -1) updatedSources.splice(index, 1);
+                  setSearchSources(updatedSources);
+                }
+              }
+            }}
+          />
+          <FilterSwitch
+            label="Description"
+            isChecked={searchSources?.includes("description") ?? false}
+            onCheckedChange={(isChecked) => {
+              const updatedSources = searchSources ? [...searchSources] : [];
+
+              if (isChecked) {
+                updatedSources.push("description");
+                setSearchSources(updatedSources);
+              } else {
+                if (updatedSources.length > 1) {
+                  const index = updatedSources.indexOf("description");
+                  if (index !== -1) updatedSources.splice(index, 1);
+                  setSearchSources(updatedSources);
+                }
+              }
+            }}
+          />
+        </div>
+      </div>
+    </>
+  );
+}
diff --git a/web/src/views/search/SearchView.tsx b/web/src/views/search/SearchView.tsx
index 665f7a4fd..07842fed6 100644
--- a/web/src/views/search/SearchView.tsx
+++ b/web/src/views/search/SearchView.tsx
@@ -10,7 +10,7 @@ import { FrigateConfig } from "@/types/frigateConfig";
 import { SearchFilter, SearchResult, SearchSource } from "@/types/search";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { isMobileOnly } from "react-device-detect";
-import { LuSearchX } from "react-icons/lu";
+import { LuImage, LuSearchX, LuText } from "react-icons/lu";
 import useSWR from "swr";
 import ExploreView from "../explore/ExploreView";
 import useKeyboardListener, {
@@ -23,6 +23,13 @@ import { isEqual } from "lodash";
 import { formatDateToLocaleString } from "@/utils/dateUtil";
 import SearchThumbnailFooter from "@/components/card/SearchThumbnailFooter";
 import SearchSettings from "@/components/settings/SearchSettings";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import Chip from "@/components/indicators/Chip";
+import { TooltipPortal } from "@radix-ui/react-tooltip";
 
 type SearchViewProps = {
   search: string;
@@ -182,6 +189,21 @@ export default function SearchView({
     setSelectedIndex(0);
   }, [searchTerm, searchFilter]);
 
+  // confidence score
+
+  const zScoreToConfidence = (score: number) => {
+    // Normalizing is not needed for similarity searches
+    // Sigmoid function for normalized: 1 / (1 + e^x)
+    // Cosine for similarity
+    if (searchFilter) {
+      const notNormalized = searchFilter?.search_type?.includes("similarity");
+
+      const confidence = notNormalized ? 1 - score : 1 / (1 + Math.exp(score));
+
+      return Math.round(confidence * 100);
+    }
+  };
+
   // update search detail when results change
 
   useEffect(() => {
@@ -351,6 +373,8 @@ export default function SearchView({
                 setColumns={setColumns}
                 defaultView={defaultView}
                 setDefaultView={setDefaultView}
+                filter={searchFilter}
+                onUpdateFilter={onUpdateFilter}
               />
               <ScrollBar orientation="horizontal" className="h-0" />
             </div>
@@ -398,6 +422,30 @@ export default function SearchView({
                         searchResult={value}
                         onClick={() => onSelectSearch(value, index)}
                       />
+                      {(searchTerm ||
+                        searchFilter?.search_type?.includes("similarity")) && (
+                        <div className={cn("absolute right-2 top-2 z-40")}>
+                          <Tooltip>
+                            <TooltipTrigger>
+                              <Chip
+                                className={`flex select-none items-center justify-between space-x-1 bg-gray-500 bg-gradient-to-br from-gray-400 to-gray-500 text-xs capitalize text-white`}
+                              >
+                                {value.search_source == "thumbnail" ? (
+                                  <LuImage className="size-3" />
+                                ) : (
+                                  <LuText className="size-3" />
+                                )}
+                              </Chip>
+                            </TooltipTrigger>
+                            <TooltipPortal>
+                              <TooltipContent>
+                                Matched {value.search_source} at{" "}
+                                {zScoreToConfidence(value.search_distance)}%
+                              </TooltipContent>
+                            </TooltipPortal>
+                          </Tooltip>
+                        </div>
+                      )}
                     </div>
                     <div
                       className={`review-item-ring pointer-events-none absolute inset-0 z-10 size-full rounded-lg outline outline-[3px] -outline-offset-[2.8px] ${selected ? `shadow-selected outline-selected` : "outline-transparent duration-500"}`}

From 8ac4b001a213d02e1c993fcb2ff3696514676c51 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 17 Oct 2024 10:02:27 -0600
Subject: [PATCH 2/5] Various fixes (#14410)

* Fix access

* Reorganize tracked object for imports

* Separate out rockchip build

* Formatting

* Use original ffmpeg build

* Fix build

* Update default search type value
---
 .github/workflows/ci.yml                    |  22 +
 docker/main/install_deps.sh                 |   5 +-
 frigate/api/defs/events_query_parameters.py |   2 +-
 frigate/object_processing.py                | 456 +-------------------
 frigate/ptz/autotrack.py                    |  26 +-
 frigate/test/test_obects.py                 |   4 +-
 frigate/track/object_attribute.py           |  44 --
 frigate/track/tracked_object.py             | 447 +++++++++++++++++++
 frigate/util/image.py                       |  66 +++
 frigate/video.py                            |   6 +-
 10 files changed, 563 insertions(+), 515 deletions(-)
 delete mode 100644 frigate/track/object_attribute.py
 create mode 100644 frigate/track/tracked_object.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bbf47a57d..3a5a67041 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -155,6 +155,28 @@ jobs:
             tensorrt.tags=${{ steps.setup.outputs.image-name }}-tensorrt
             *.cache-from=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64
             *.cache-to=type=registry,ref=${{ steps.setup.outputs.cache-name }}-amd64,mode=max
+  arm64_extra_builds:
+    runs-on: ubuntu-latest
+    name: ARM Extra Build
+    needs:
+      - arm64_build
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Set up QEMU and Buildx
+        id: setup
+        uses: ./.github/actions/setup
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push Rockchip build
+        uses: docker/bake-action@v3
+        with:
+          push: true
+          targets: rk
+          files: docker/rockchip/rk.hcl
+          set: |
+            rk.tags=${{ steps.setup.outputs.image-name }}-rk
+            *.cache-from=type=gha
   combined_extra_builds:
     runs-on: ubuntu-latest
     name: Combined Extra Builds
diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh
index 46f2a5357..2d7662053 100755
--- a/docker/main/install_deps.sh
+++ b/docker/main/install_deps.sh
@@ -8,6 +8,7 @@ apt-get -qq install --no-install-recommends -y \
     apt-transport-https \
     gnupg \
     wget \
+    lbzip2 \
     procps vainfo \
     unzip locales tzdata libxml2 xz-utils \
     python3.9 \
@@ -45,7 +46,7 @@ if [[ "${TARGETARCH}" == "amd64" ]]; then
     wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linux64-gpl-5.1.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay
-    wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linux64-gpl-7.1.tar.xz"
+    wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linux64-gpl-7.0.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay
 fi
@@ -57,7 +58,7 @@ if [[ "${TARGETARCH}" == "arm64" ]]; then
     wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2022-07-31-12-37/ffmpeg-n5.1-2-g915ef932a3-linuxarm64-gpl-5.1.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/5.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/5.0/doc /usr/lib/ffmpeg/5.0/bin/ffplay
-    wget -qO btbn-ffmpeg.tar.xz "https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-09-30-15-36/ffmpeg-n7.1-linuxarm64-gpl-7.1.tar.xz"
+    wget -qO btbn-ffmpeg.tar.xz "https://github.com/NickM-27/FFmpeg-Builds/releases/download/autobuild-2024-09-19-12-51/ffmpeg-n7.0.2-18-g3e6cec1286-linuxarm64-gpl-7.0.tar.xz"
     tar -xf btbn-ffmpeg.tar.xz -C /usr/lib/ffmpeg/7.0 --strip-components 1
     rm -rf btbn-ffmpeg.tar.xz /usr/lib/ffmpeg/7.0/doc /usr/lib/ffmpeg/7.0/bin/ffplay
 fi
diff --git a/frigate/api/defs/events_query_parameters.py b/frigate/api/defs/events_query_parameters.py
index c4e40bd4e..f4c98809c 100644
--- a/frigate/api/defs/events_query_parameters.py
+++ b/frigate/api/defs/events_query_parameters.py
@@ -35,7 +35,7 @@ class EventsQueryParams(BaseModel):
 class EventsSearchQueryParams(BaseModel):
     query: Optional[str] = None
     event_id: Optional[str] = None
-    search_type: Optional[str] = "thumbnail,description"
+    search_type: Optional[str] = "thumbnail"
     include_thumbnails: Optional[int] = 1
     limit: Optional[int] = 50
     cameras: Optional[str] = "all"
diff --git a/frigate/object_processing.py b/frigate/object_processing.py
index 6e63562a4..7ba3270f1 100644
--- a/frigate/object_processing.py
+++ b/frigate/object_processing.py
@@ -1,4 +1,3 @@
-import base64
 import datetime
 import json
 import logging
@@ -7,7 +6,6 @@ import queue
 import threading
 from collections import Counter, defaultdict
 from multiprocessing.synchronize import Event as MpEvent
-from statistics import median
 from typing import Callable
 
 import cv2
@@ -18,9 +16,7 @@ from frigate.comms.dispatcher import Dispatcher
 from frigate.comms.events_updater import EventEndSubscriber, EventUpdatePublisher
 from frigate.comms.inter_process import InterProcessRequestor
 from frigate.config import (
-    CameraConfig,
     FrigateConfig,
-    ModelConfig,
     MqttConfig,
     RecordConfig,
     SnapshotsConfig,
@@ -29,466 +25,18 @@ from frigate.config import (
 from frigate.const import CLIPS_DIR, UPDATE_CAMERA_ACTIVITY
 from frigate.events.types import EventStateEnum, EventTypeEnum
 from frigate.ptz.autotrack import PtzAutoTrackerThread
+from frigate.track.tracked_object import TrackedObject
 from frigate.util.image import (
     SharedMemoryFrameManager,
-    area,
-    calculate_region,
     draw_box_with_label,
     draw_timestamp,
+    is_better_thumbnail,
     is_label_printable,
 )
 
 logger = logging.getLogger(__name__)
 
 
-def on_edge(box, frame_shape):
-    if (
-        box[0] == 0
-        or box[1] == 0
-        or box[2] == frame_shape[1] - 1
-        or box[3] == frame_shape[0] - 1
-    ):
-        return True
-
-
-def has_better_attr(current_thumb, new_obj, attr_label) -> bool:
-    max_new_attr = max(
-        [0]
-        + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label]
-    )
-    max_current_attr = max(
-        [0]
-        + [
-            area(a["box"])
-            for a in current_thumb["attributes"]
-            if a["label"] == attr_label
-        ]
-    )
-
-    # if the thumb has a higher scoring attr
-    return max_new_attr > max_current_attr
-
-
-def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool:
-    # larger is better
-    # cutoff images are less ideal, but they should also be smaller?
-    # better scores are obviously better too
-
-    # check face on person
-    if label == "person":
-        if has_better_attr(current_thumb, new_obj, "face"):
-            return True
-        # if the current thumb has a face attr, dont update unless it gets better
-        if any([a["label"] == "face" for a in current_thumb["attributes"]]):
-            return False
-
-    # check license_plate on car
-    if label == "car":
-        if has_better_attr(current_thumb, new_obj, "license_plate"):
-            return True
-        # if the current thumb has a license_plate attr, dont update unless it gets better
-        if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]):
-            return False
-
-    # if the new_thumb is on an edge, and the current thumb is not
-    if on_edge(new_obj["box"], frame_shape) and not on_edge(
-        current_thumb["box"], frame_shape
-    ):
-        return False
-
-    # if the score is better by more than 5%
-    if new_obj["score"] > current_thumb["score"] + 0.05:
-        return True
-
-    # if the area is 10% larger
-    if new_obj["area"] > current_thumb["area"] * 1.1:
-        return True
-
-    return False
-
-
-class TrackedObject:
-    def __init__(
-        self,
-        model_config: ModelConfig,
-        camera_config: CameraConfig,
-        frame_cache,
-        obj_data: dict[str, any],
-    ):
-        # set the score history then remove as it is not part of object state
-        self.score_history = obj_data["score_history"]
-        del obj_data["score_history"]
-
-        self.obj_data = obj_data
-        self.colormap = model_config.colormap
-        self.logos = model_config.all_attribute_logos
-        self.camera_config = camera_config
-        self.frame_cache = frame_cache
-        self.zone_presence: dict[str, int] = {}
-        self.zone_loitering: dict[str, int] = {}
-        self.current_zones = []
-        self.entered_zones = []
-        self.attributes = defaultdict(float)
-        self.false_positive = True
-        self.has_clip = False
-        self.has_snapshot = False
-        self.top_score = self.computed_score = 0.0
-        self.thumbnail_data = None
-        self.last_updated = 0
-        self.last_published = 0
-        self.frame = None
-        self.active = True
-        self.pending_loitering = False
-        self.previous = self.to_dict()
-
-    def _is_false_positive(self):
-        # once a true positive, always a true positive
-        if not self.false_positive:
-            return False
-
-        threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold
-        return self.computed_score < threshold
-
-    def compute_score(self):
-        """get median of scores for object."""
-        return median(self.score_history)
-
-    def update(self, current_frame_time: float, obj_data, has_valid_frame: bool):
-        thumb_update = False
-        significant_change = False
-        autotracker_update = False
-        # if the object is not in the current frame, add a 0.0 to the score history
-        if obj_data["frame_time"] != current_frame_time:
-            self.score_history.append(0.0)
-        else:
-            self.score_history.append(obj_data["score"])
-
-        # only keep the last 10 scores
-        if len(self.score_history) > 10:
-            self.score_history = self.score_history[-10:]
-
-        # calculate if this is a false positive
-        self.computed_score = self.compute_score()
-        if self.computed_score > self.top_score:
-            self.top_score = self.computed_score
-        self.false_positive = self._is_false_positive()
-        self.active = self.is_active()
-
-        if not self.false_positive and has_valid_frame:
-            # determine if this frame is a better thumbnail
-            if self.thumbnail_data is None or is_better_thumbnail(
-                self.obj_data["label"],
-                self.thumbnail_data,
-                obj_data,
-                self.camera_config.frame_shape,
-            ):
-                self.thumbnail_data = {
-                    "frame_time": current_frame_time,
-                    "box": obj_data["box"],
-                    "area": obj_data["area"],
-                    "region": obj_data["region"],
-                    "score": obj_data["score"],
-                    "attributes": obj_data["attributes"],
-                }
-                thumb_update = True
-
-        # check zones
-        current_zones = []
-        bottom_center = (obj_data["centroid"][0], obj_data["box"][3])
-        in_loitering_zone = False
-
-        # check each zone
-        for name, zone in self.camera_config.zones.items():
-            # if the zone is not for this object type, skip
-            if len(zone.objects) > 0 and obj_data["label"] not in zone.objects:
-                continue
-            contour = zone.contour
-            zone_score = self.zone_presence.get(name, 0) + 1
-            # check if the object is in the zone
-            if cv2.pointPolygonTest(contour, bottom_center, False) >= 0:
-                # if the object passed the filters once, dont apply again
-                if name in self.current_zones or not zone_filtered(self, zone.filters):
-                    # an object is only considered present in a zone if it has a zone inertia of 3+
-                    if zone_score >= zone.inertia:
-                        # if the zone has loitering time, update loitering status
-                        if zone.loitering_time > 0:
-                            in_loitering_zone = True
-
-                        loitering_score = self.zone_loitering.get(name, 0) + 1
-
-                        # loitering time is configured as seconds, convert to count of frames
-                        if loitering_score >= (
-                            self.camera_config.zones[name].loitering_time
-                            * self.camera_config.detect.fps
-                        ):
-                            current_zones.append(name)
-
-                            if name not in self.entered_zones:
-                                self.entered_zones.append(name)
-                        else:
-                            self.zone_loitering[name] = loitering_score
-                    else:
-                        self.zone_presence[name] = zone_score
-            else:
-                # once an object has a zone inertia of 3+ it is not checked anymore
-                if 0 < zone_score < zone.inertia:
-                    self.zone_presence[name] = zone_score - 1
-
-        # update loitering status
-        self.pending_loitering = in_loitering_zone
-
-        # maintain attributes
-        for attr in obj_data["attributes"]:
-            if self.attributes[attr["label"]] < attr["score"]:
-                self.attributes[attr["label"]] = attr["score"]
-
-        # populate the sub_label for object with highest scoring logo
-        if self.obj_data["label"] in ["car", "package", "person"]:
-            recognized_logos = {
-                k: self.attributes[k] for k in self.logos if k in self.attributes
-            }
-            if len(recognized_logos) > 0:
-                max_logo = max(recognized_logos, key=recognized_logos.get)
-
-                # don't overwrite sub label if it is already set
-                if (
-                    self.obj_data.get("sub_label") is None
-                    or self.obj_data["sub_label"][0] == max_logo
-                ):
-                    self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo])
-
-        # check for significant change
-        if not self.false_positive:
-            # if the zones changed, signal an update
-            if set(self.current_zones) != set(current_zones):
-                significant_change = True
-
-            # if the position changed, signal an update
-            if self.obj_data["position_changes"] != obj_data["position_changes"]:
-                significant_change = True
-
-            if self.obj_data["attributes"] != obj_data["attributes"]:
-                significant_change = True
-
-            # if the state changed between stationary and active
-            if self.previous["active"] != self.active:
-                significant_change = True
-
-            # update at least once per minute
-            if self.obj_data["frame_time"] - self.previous["frame_time"] > 60:
-                significant_change = True
-
-            # update autotrack at most 3 objects per second
-            if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3):
-                autotracker_update = True
-
-        self.obj_data.update(obj_data)
-        self.current_zones = current_zones
-        return (thumb_update, significant_change, autotracker_update)
-
-    def to_dict(self, include_thumbnail: bool = False):
-        event = {
-            "id": self.obj_data["id"],
-            "camera": self.camera_config.name,
-            "frame_time": self.obj_data["frame_time"],
-            "snapshot": self.thumbnail_data,
-            "label": self.obj_data["label"],
-            "sub_label": self.obj_data.get("sub_label"),
-            "top_score": self.top_score,
-            "false_positive": self.false_positive,
-            "start_time": self.obj_data["start_time"],
-            "end_time": self.obj_data.get("end_time", None),
-            "score": self.obj_data["score"],
-            "box": self.obj_data["box"],
-            "area": self.obj_data["area"],
-            "ratio": self.obj_data["ratio"],
-            "region": self.obj_data["region"],
-            "active": self.active,
-            "stationary": not self.active,
-            "motionless_count": self.obj_data["motionless_count"],
-            "position_changes": self.obj_data["position_changes"],
-            "current_zones": self.current_zones.copy(),
-            "entered_zones": self.entered_zones.copy(),
-            "has_clip": self.has_clip,
-            "has_snapshot": self.has_snapshot,
-            "attributes": self.attributes,
-            "current_attributes": self.obj_data["attributes"],
-            "pending_loitering": self.pending_loitering,
-        }
-
-        if include_thumbnail:
-            event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8")
-
-        return event
-
-    def is_active(self):
-        return not self.is_stationary()
-
-    def is_stationary(self):
-        return (
-            self.obj_data["motionless_count"]
-            > self.camera_config.detect.stationary.threshold
-        )
-
-    def get_thumbnail(self):
-        if (
-            self.thumbnail_data is None
-            or self.thumbnail_data["frame_time"] not in self.frame_cache
-        ):
-            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
-
-        jpg_bytes = self.get_jpg_bytes(
-            timestamp=False, bounding_box=False, crop=True, height=175
-        )
-
-        if jpg_bytes:
-            return jpg_bytes
-        else:
-            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
-            return jpg.tobytes()
-
-    def get_clean_png(self):
-        if self.thumbnail_data is None:
-            return None
-
-        try:
-            best_frame = cv2.cvtColor(
-                self.frame_cache[self.thumbnail_data["frame_time"]],
-                cv2.COLOR_YUV2BGR_I420,
-            )
-        except KeyError:
-            logger.warning(
-                f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache"
-            )
-            return None
-
-        ret, png = cv2.imencode(".png", best_frame)
-        if ret:
-            return png.tobytes()
-        else:
-            return None
-
-    def get_jpg_bytes(
-        self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70
-    ):
-        if self.thumbnail_data is None:
-            return None
-
-        try:
-            best_frame = cv2.cvtColor(
-                self.frame_cache[self.thumbnail_data["frame_time"]],
-                cv2.COLOR_YUV2BGR_I420,
-            )
-        except KeyError:
-            logger.warning(
-                f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache"
-            )
-            return None
-
-        if bounding_box:
-            thickness = 2
-            color = self.colormap[self.obj_data["label"]]
-
-            # draw the bounding boxes on the frame
-            box = self.thumbnail_data["box"]
-            draw_box_with_label(
-                best_frame,
-                box[0],
-                box[1],
-                box[2],
-                box[3],
-                self.obj_data["label"],
-                f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}",
-                thickness=thickness,
-                color=color,
-            )
-
-            # draw any attributes
-            for attribute in self.thumbnail_data["attributes"]:
-                box = attribute["box"]
-                draw_box_with_label(
-                    best_frame,
-                    box[0],
-                    box[1],
-                    box[2],
-                    box[3],
-                    attribute["label"],
-                    f"{attribute['score']:.0%}",
-                    thickness=thickness,
-                    color=color,
-                )
-
-        if crop:
-            box = self.thumbnail_data["box"]
-            box_size = 300
-            region = calculate_region(
-                best_frame.shape,
-                box[0],
-                box[1],
-                box[2],
-                box[3],
-                box_size,
-                multiplier=1.1,
-            )
-            best_frame = best_frame[region[1] : region[3], region[0] : region[2]]
-
-        if height:
-            width = int(height * best_frame.shape[1] / best_frame.shape[0])
-            best_frame = cv2.resize(
-                best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA
-            )
-        if timestamp:
-            color = self.camera_config.timestamp_style.color
-            draw_timestamp(
-                best_frame,
-                self.thumbnail_data["frame_time"],
-                self.camera_config.timestamp_style.format,
-                font_effect=self.camera_config.timestamp_style.effect,
-                font_thickness=self.camera_config.timestamp_style.thickness,
-                font_color=(color.blue, color.green, color.red),
-                position=self.camera_config.timestamp_style.position,
-            )
-
-        ret, jpg = cv2.imencode(
-            ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality]
-        )
-        if ret:
-            return jpg.tobytes()
-        else:
-            return None
-
-
-def zone_filtered(obj: TrackedObject, object_config):
-    object_name = obj.obj_data["label"]
-
-    if object_name in object_config:
-        obj_settings = object_config[object_name]
-
-        # if the min area is larger than the
-        # detected object, don't add it to detected objects
-        if obj_settings.min_area > obj.obj_data["area"]:
-            return True
-
-        # if the detected object is larger than the
-        # max area, don't add it to detected objects
-        if obj_settings.max_area < obj.obj_data["area"]:
-            return True
-
-        # if the score is lower than the threshold, skip
-        if obj_settings.threshold > obj.computed_score:
-            return True
-
-        # if the object is not proportionally wide enough
-        if obj_settings.min_ratio > obj.obj_data["ratio"]:
-            return True
-
-        # if the object is proportionally too wide
-        if obj_settings.max_ratio < obj.obj_data["ratio"]:
-            return True
-
-    return False
-
-
 # Maintains the state of a camera
 class CameraState:
     def __init__(
diff --git a/frigate/ptz/autotrack.py b/frigate/ptz/autotrack.py
index fd9933bcb..e9226f267 100644
--- a/frigate/ptz/autotrack.py
+++ b/frigate/ptz/autotrack.py
@@ -32,6 +32,7 @@ from frigate.const import (
     CONFIG_DIR,
 )
 from frigate.ptz.onvif import OnvifController
+from frigate.track.tracked_object import TrackedObject
 from frigate.util.builtin import update_yaml_file
 from frigate.util.image import SharedMemoryFrameManager, intersection_over_union
 
@@ -214,7 +215,7 @@ class PtzAutoTracker:
             ):
                 self._autotracker_setup(camera_config, camera)
 
-    def _autotracker_setup(self, camera_config, camera):
+    def _autotracker_setup(self, camera_config: CameraConfig, camera: str):
         logger.debug(f"{camera}: Autotracker init")
 
         self.object_types[camera] = camera_config.onvif.autotracking.track
@@ -852,7 +853,7 @@ class PtzAutoTracker:
             logger.debug(f"{camera}: Valid velocity ")
             return True, velocities.flatten()
 
-    def _get_distance_threshold(self, camera, obj):
+    def _get_distance_threshold(self, camera: str, obj: TrackedObject):
         # Returns true if Euclidean distance from object to center of frame is
         # less than 10% of the of the larger dimension (width or height) of the frame,
         # multiplied by a scaling factor for object size.
@@ -888,7 +889,9 @@ class PtzAutoTracker:
 
         return distance_threshold
 
-    def _should_zoom_in(self, camera, obj, box, predicted_time, debug_zooming=False):
+    def _should_zoom_in(
+        self, camera: str, obj: TrackedObject, box, predicted_time, debug_zooming=False
+    ):
         # returns True if we should zoom in, False if we should zoom out, None to do nothing
         camera_config = self.config.cameras[camera]
         camera_width = camera_config.frame_shape[1]
@@ -1019,7 +1022,7 @@ class PtzAutoTracker:
         # Don't zoom at all
         return None
 
-    def _autotrack_move_ptz(self, camera, obj):
+    def _autotrack_move_ptz(self, camera: str, obj: TrackedObject):
         camera_config = self.config.cameras[camera]
         camera_width = camera_config.frame_shape[1]
         camera_height = camera_config.frame_shape[0]
@@ -1090,7 +1093,12 @@ class PtzAutoTracker:
                 self._enqueue_move(camera, obj.obj_data["frame_time"], 0, 0, zoom)
 
     def _get_zoom_amount(
-        self, camera, obj, predicted_box, predicted_movement_time, debug_zoom=True
+        self,
+        camera: str,
+        obj: TrackedObject,
+        predicted_box,
+        predicted_movement_time,
+        debug_zoom=True,
     ):
         camera_config = self.config.cameras[camera]
 
@@ -1186,13 +1194,13 @@ class PtzAutoTracker:
 
         return zoom
 
-    def is_autotracking(self, camera):
+    def is_autotracking(self, camera: str):
         return self.tracked_object[camera] is not None
 
-    def autotracked_object_region(self, camera):
+    def autotracked_object_region(self, camera: str):
         return self.tracked_object[camera]["region"]
 
-    def autotrack_object(self, camera, obj):
+    def autotrack_object(self, camera: str, obj: TrackedObject):
         camera_config = self.config.cameras[camera]
 
         if camera_config.onvif.autotracking.enabled:
@@ -1208,7 +1216,7 @@ class PtzAutoTracker:
             if (
                 # new object
                 self.tracked_object[camera] is None
-                and obj.camera == camera
+                and obj.camera_config.name == camera
                 and obj.obj_data["label"] in self.object_types[camera]
                 and set(obj.entered_zones) & set(self.required_zones[camera])
                 and not obj.previous["false_positive"]
diff --git a/frigate/test/test_obects.py b/frigate/test/test_obects.py
index f1c039ef8..8fe831980 100644
--- a/frigate/test/test_obects.py
+++ b/frigate/test/test_obects.py
@@ -1,11 +1,11 @@
 import unittest
 
-from frigate.track.object_attribute import ObjectAttribute
+from frigate.track.tracked_object import TrackedObjectAttribute
 
 
 class TestAttribute(unittest.TestCase):
     def test_overlapping_object_selection(self) -> None:
-        attribute = ObjectAttribute(
+        attribute = TrackedObjectAttribute(
             (
                 "amazon",
                 0.80078125,
diff --git a/frigate/track/object_attribute.py b/frigate/track/object_attribute.py
deleted file mode 100644
index 54433c5f3..000000000
--- a/frigate/track/object_attribute.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""Object attribute."""
-
-from frigate.util.object import area, box_inside
-
-
-class ObjectAttribute:
-    def __init__(self, raw_data: tuple) -> None:
-        self.label = raw_data[0]
-        self.score = raw_data[1]
-        self.box = raw_data[2]
-        self.area = raw_data[3]
-        self.ratio = raw_data[4]
-        self.region = raw_data[5]
-
-    def get_tracking_data(self) -> dict[str, any]:
-        """Return data saved to the object."""
-        return {
-            "label": self.label,
-            "score": self.score,
-            "box": self.box,
-        }
-
-    def find_best_object(self, objects: list[dict[str, any]]) -> str:
-        """Find the best attribute for each object and return its ID."""
-        best_object_area = None
-        best_object_id = None
-
-        for obj in objects:
-            if not box_inside(obj["box"], self.box):
-                continue
-
-            object_area = area(obj["box"])
-
-            # if multiple objects have the same attribute then they
-            # are overlapping, it is most likely that the smaller object
-            # is the one with the attribute
-            if best_object_area is None:
-                best_object_area = object_area
-                best_object_id = obj["id"]
-            elif object_area < best_object_area:
-                best_object_area = object_area
-                best_object_id = obj["id"]
-
-        return best_object_id
diff --git a/frigate/track/tracked_object.py b/frigate/track/tracked_object.py
new file mode 100644
index 000000000..a4b4e8426
--- /dev/null
+++ b/frigate/track/tracked_object.py
@@ -0,0 +1,447 @@
+"""Object attribute."""
+
+import base64
+import logging
+from collections import defaultdict
+from statistics import median
+
+import cv2
+import numpy as np
+
+from frigate.config import (
+    CameraConfig,
+    ModelConfig,
+)
+from frigate.util.image import (
+    area,
+    calculate_region,
+    draw_box_with_label,
+    draw_timestamp,
+    is_better_thumbnail,
+)
+from frigate.util.object import box_inside
+
+logger = logging.getLogger(__name__)
+
+
+class TrackedObject:
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        camera_config: CameraConfig,
+        frame_cache,
+        obj_data: dict[str, any],
+    ):
+        # set the score history then remove as it is not part of object state
+        self.score_history = obj_data["score_history"]
+        del obj_data["score_history"]
+
+        self.obj_data = obj_data
+        self.colormap = model_config.colormap
+        self.logos = model_config.all_attribute_logos
+        self.camera_config = camera_config
+        self.frame_cache = frame_cache
+        self.zone_presence: dict[str, int] = {}
+        self.zone_loitering: dict[str, int] = {}
+        self.current_zones = []
+        self.entered_zones = []
+        self.attributes = defaultdict(float)
+        self.false_positive = True
+        self.has_clip = False
+        self.has_snapshot = False
+        self.top_score = self.computed_score = 0.0
+        self.thumbnail_data = None
+        self.last_updated = 0
+        self.last_published = 0
+        self.frame = None
+        self.active = True
+        self.pending_loitering = False
+        self.previous = self.to_dict()
+
+    def _is_false_positive(self):
+        # once a true positive, always a true positive
+        if not self.false_positive:
+            return False
+
+        threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold
+        return self.computed_score < threshold
+
+    def compute_score(self):
+        """get median of scores for object."""
+        return median(self.score_history)
+
+    def update(self, current_frame_time: float, obj_data, has_valid_frame: bool):
+        thumb_update = False
+        significant_change = False
+        autotracker_update = False
+        # if the object is not in the current frame, add a 0.0 to the score history
+        if obj_data["frame_time"] != current_frame_time:
+            self.score_history.append(0.0)
+        else:
+            self.score_history.append(obj_data["score"])
+
+        # only keep the last 10 scores
+        if len(self.score_history) > 10:
+            self.score_history = self.score_history[-10:]
+
+        # calculate if this is a false positive
+        self.computed_score = self.compute_score()
+        if self.computed_score > self.top_score:
+            self.top_score = self.computed_score
+        self.false_positive = self._is_false_positive()
+        self.active = self.is_active()
+
+        if not self.false_positive and has_valid_frame:
+            # determine if this frame is a better thumbnail
+            if self.thumbnail_data is None or is_better_thumbnail(
+                self.obj_data["label"],
+                self.thumbnail_data,
+                obj_data,
+                self.camera_config.frame_shape,
+            ):
+                self.thumbnail_data = {
+                    "frame_time": current_frame_time,
+                    "box": obj_data["box"],
+                    "area": obj_data["area"],
+                    "region": obj_data["region"],
+                    "score": obj_data["score"],
+                    "attributes": obj_data["attributes"],
+                }
+                thumb_update = True
+
+        # check zones
+        current_zones = []
+        bottom_center = (obj_data["centroid"][0], obj_data["box"][3])
+        in_loitering_zone = False
+
+        # check each zone
+        for name, zone in self.camera_config.zones.items():
+            # if the zone is not for this object type, skip
+            if len(zone.objects) > 0 and obj_data["label"] not in zone.objects:
+                continue
+            contour = zone.contour
+            zone_score = self.zone_presence.get(name, 0) + 1
+            # check if the object is in the zone
+            if cv2.pointPolygonTest(contour, bottom_center, False) >= 0:
+                # if the object passed the filters once, dont apply again
+                if name in self.current_zones or not zone_filtered(self, zone.filters):
+                    # an object is only considered present in a zone if it has a zone inertia of 3+
+                    if zone_score >= zone.inertia:
+                        # if the zone has loitering time, update loitering status
+                        if zone.loitering_time > 0:
+                            in_loitering_zone = True
+
+                        loitering_score = self.zone_loitering.get(name, 0) + 1
+
+                        # loitering time is configured as seconds, convert to count of frames
+                        if loitering_score >= (
+                            self.camera_config.zones[name].loitering_time
+                            * self.camera_config.detect.fps
+                        ):
+                            current_zones.append(name)
+
+                            if name not in self.entered_zones:
+                                self.entered_zones.append(name)
+                        else:
+                            self.zone_loitering[name] = loitering_score
+                    else:
+                        self.zone_presence[name] = zone_score
+            else:
+                # once an object has a zone inertia of 3+ it is not checked anymore
+                if 0 < zone_score < zone.inertia:
+                    self.zone_presence[name] = zone_score - 1
+
+        # update loitering status
+        self.pending_loitering = in_loitering_zone
+
+        # maintain attributes
+        for attr in obj_data["attributes"]:
+            if self.attributes[attr["label"]] < attr["score"]:
+                self.attributes[attr["label"]] = attr["score"]
+
+        # populate the sub_label for object with highest scoring logo
+        if self.obj_data["label"] in ["car", "package", "person"]:
+            recognized_logos = {
+                k: self.attributes[k] for k in self.logos if k in self.attributes
+            }
+            if len(recognized_logos) > 0:
+                max_logo = max(recognized_logos, key=recognized_logos.get)
+
+                # don't overwrite sub label if it is already set
+                if (
+                    self.obj_data.get("sub_label") is None
+                    or self.obj_data["sub_label"][0] == max_logo
+                ):
+                    self.obj_data["sub_label"] = (max_logo, recognized_logos[max_logo])
+
+        # check for significant change
+        if not self.false_positive:
+            # if the zones changed, signal an update
+            if set(self.current_zones) != set(current_zones):
+                significant_change = True
+
+            # if the position changed, signal an update
+            if self.obj_data["position_changes"] != obj_data["position_changes"]:
+                significant_change = True
+
+            if self.obj_data["attributes"] != obj_data["attributes"]:
+                significant_change = True
+
+            # if the state changed between stationary and active
+            if self.previous["active"] != self.active:
+                significant_change = True
+
+            # update at least once per minute
+            if self.obj_data["frame_time"] - self.previous["frame_time"] > 60:
+                significant_change = True
+
+            # update autotrack at most 3 objects per second
+            if self.obj_data["frame_time"] - self.previous["frame_time"] >= (1 / 3):
+                autotracker_update = True
+
+        self.obj_data.update(obj_data)
+        self.current_zones = current_zones
+        return (thumb_update, significant_change, autotracker_update)
+
+    def to_dict(self, include_thumbnail: bool = False):
+        event = {
+            "id": self.obj_data["id"],
+            "camera": self.camera_config.name,
+            "frame_time": self.obj_data["frame_time"],
+            "snapshot": self.thumbnail_data,
+            "label": self.obj_data["label"],
+            "sub_label": self.obj_data.get("sub_label"),
+            "top_score": self.top_score,
+            "false_positive": self.false_positive,
+            "start_time": self.obj_data["start_time"],
+            "end_time": self.obj_data.get("end_time", None),
+            "score": self.obj_data["score"],
+            "box": self.obj_data["box"],
+            "area": self.obj_data["area"],
+            "ratio": self.obj_data["ratio"],
+            "region": self.obj_data["region"],
+            "active": self.active,
+            "stationary": not self.active,
+            "motionless_count": self.obj_data["motionless_count"],
+            "position_changes": self.obj_data["position_changes"],
+            "current_zones": self.current_zones.copy(),
+            "entered_zones": self.entered_zones.copy(),
+            "has_clip": self.has_clip,
+            "has_snapshot": self.has_snapshot,
+            "attributes": self.attributes,
+            "current_attributes": self.obj_data["attributes"],
+            "pending_loitering": self.pending_loitering,
+        }
+
+        if include_thumbnail:
+            event["thumbnail"] = base64.b64encode(self.get_thumbnail()).decode("utf-8")
+
+        return event
+
+    def is_active(self):
+        return not self.is_stationary()
+
+    def is_stationary(self):
+        return (
+            self.obj_data["motionless_count"]
+            > self.camera_config.detect.stationary.threshold
+        )
+
+    def get_thumbnail(self):
+        if (
+            self.thumbnail_data is None
+            or self.thumbnail_data["frame_time"] not in self.frame_cache
+        ):
+            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
+
+        jpg_bytes = self.get_jpg_bytes(
+            timestamp=False, bounding_box=False, crop=True, height=175
+        )
+
+        if jpg_bytes:
+            return jpg_bytes
+        else:
+            ret, jpg = cv2.imencode(".jpg", np.zeros((175, 175, 3), np.uint8))
+            return jpg.tobytes()
+
+    def get_clean_png(self):
+        if self.thumbnail_data is None:
+            return None
+
+        try:
+            best_frame = cv2.cvtColor(
+                self.frame_cache[self.thumbnail_data["frame_time"]],
+                cv2.COLOR_YUV2BGR_I420,
+            )
+        except KeyError:
+            logger.warning(
+                f"Unable to create clean png because frame {self.thumbnail_data['frame_time']} is not in the cache"
+            )
+            return None
+
+        ret, png = cv2.imencode(".png", best_frame)
+        if ret:
+            return png.tobytes()
+        else:
+            return None
+
+    def get_jpg_bytes(
+        self, timestamp=False, bounding_box=False, crop=False, height=None, quality=70
+    ):
+        if self.thumbnail_data is None:
+            return None
+
+        try:
+            best_frame = cv2.cvtColor(
+                self.frame_cache[self.thumbnail_data["frame_time"]],
+                cv2.COLOR_YUV2BGR_I420,
+            )
+        except KeyError:
+            logger.warning(
+                f"Unable to create jpg because frame {self.thumbnail_data['frame_time']} is not in the cache"
+            )
+            return None
+
+        if bounding_box:
+            thickness = 2
+            color = self.colormap[self.obj_data["label"]]
+
+            # draw the bounding boxes on the frame
+            box = self.thumbnail_data["box"]
+            draw_box_with_label(
+                best_frame,
+                box[0],
+                box[1],
+                box[2],
+                box[3],
+                self.obj_data["label"],
+                f"{int(self.thumbnail_data['score']*100)}% {int(self.thumbnail_data['area'])}",
+                thickness=thickness,
+                color=color,
+            )
+
+            # draw any attributes
+            for attribute in self.thumbnail_data["attributes"]:
+                box = attribute["box"]
+                draw_box_with_label(
+                    best_frame,
+                    box[0],
+                    box[1],
+                    box[2],
+                    box[3],
+                    attribute["label"],
+                    f"{attribute['score']:.0%}",
+                    thickness=thickness,
+                    color=color,
+                )
+
+        if crop:
+            box = self.thumbnail_data["box"]
+            box_size = 300
+            region = calculate_region(
+                best_frame.shape,
+                box[0],
+                box[1],
+                box[2],
+                box[3],
+                box_size,
+                multiplier=1.1,
+            )
+            best_frame = best_frame[region[1] : region[3], region[0] : region[2]]
+
+        if height:
+            width = int(height * best_frame.shape[1] / best_frame.shape[0])
+            best_frame = cv2.resize(
+                best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA
+            )
+        if timestamp:
+            color = self.camera_config.timestamp_style.color
+            draw_timestamp(
+                best_frame,
+                self.thumbnail_data["frame_time"],
+                self.camera_config.timestamp_style.format,
+                font_effect=self.camera_config.timestamp_style.effect,
+                font_thickness=self.camera_config.timestamp_style.thickness,
+                font_color=(color.blue, color.green, color.red),
+                position=self.camera_config.timestamp_style.position,
+            )
+
+        ret, jpg = cv2.imencode(
+            ".jpg", best_frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+        )
+        if ret:
+            return jpg.tobytes()
+        else:
+            return None
+
+
+def zone_filtered(obj: TrackedObject, object_config):
+    object_name = obj.obj_data["label"]
+
+    if object_name in object_config:
+        obj_settings = object_config[object_name]
+
+        # if the min area is larger than the
+        # detected object, don't add it to detected objects
+        if obj_settings.min_area > obj.obj_data["area"]:
+            return True
+
+        # if the detected object is larger than the
+        # max area, don't add it to detected objects
+        if obj_settings.max_area < obj.obj_data["area"]:
+            return True
+
+        # if the score is lower than the threshold, skip
+        if obj_settings.threshold > obj.computed_score:
+            return True
+
+        # if the object is not proportionally wide enough
+        if obj_settings.min_ratio > obj.obj_data["ratio"]:
+            return True
+
+        # if the object is proportionally too wide
+        if obj_settings.max_ratio < obj.obj_data["ratio"]:
+            return True
+
+    return False
+
+
+class TrackedObjectAttribute:
+    def __init__(self, raw_data: tuple) -> None:
+        self.label = raw_data[0]
+        self.score = raw_data[1]
+        self.box = raw_data[2]
+        self.area = raw_data[3]
+        self.ratio = raw_data[4]
+        self.region = raw_data[5]
+
+    def get_tracking_data(self) -> dict[str, any]:
+        """Return data saved to the object."""
+        return {
+            "label": self.label,
+            "score": self.score,
+            "box": self.box,
+        }
+
+    def find_best_object(self, objects: list[dict[str, any]]) -> str:
+        """Find the best attribute for each object and return its ID."""
+        best_object_area = None
+        best_object_id = None
+
+        for obj in objects:
+            if not box_inside(obj["box"], self.box):
+                continue
+
+            object_area = area(obj["box"])
+
+            # if multiple objects have the same attribute then they
+            # are overlapping, it is most likely that the smaller object
+            # is the one with the attribute
+            if best_object_area is None:
+                best_object_area = object_area
+                best_object_id = obj["id"]
+            elif object_area < best_object_area:
+                best_object_area = object_area
+                best_object_id = obj["id"]
+
+        return best_object_id
diff --git a/frigate/util/image.py b/frigate/util/image.py
index 41024a599..484737f71 100644
--- a/frigate/util/image.py
+++ b/frigate/util/image.py
@@ -36,6 +36,72 @@ def transliterate_to_latin(text: str) -> str:
     return unidecode(text)
 
 
+def on_edge(box, frame_shape):
+    if (
+        box[0] == 0
+        or box[1] == 0
+        or box[2] == frame_shape[1] - 1
+        or box[3] == frame_shape[0] - 1
+    ):
+        return True
+
+
+def has_better_attr(current_thumb, new_obj, attr_label) -> bool:
+    max_new_attr = max(
+        [0]
+        + [area(a["box"]) for a in new_obj["attributes"] if a["label"] == attr_label]
+    )
+    max_current_attr = max(
+        [0]
+        + [
+            area(a["box"])
+            for a in current_thumb["attributes"]
+            if a["label"] == attr_label
+        ]
+    )
+
+    # if the thumb has a higher scoring attr
+    return max_new_attr > max_current_attr
+
+
+def is_better_thumbnail(label, current_thumb, new_obj, frame_shape) -> bool:
+    # larger is better
+    # cutoff images are less ideal, but they should also be smaller?
+    # better scores are obviously better too
+
+    # check face on person
+    if label == "person":
+        if has_better_attr(current_thumb, new_obj, "face"):
+            return True
+        # if the current thumb has a face attr, dont update unless it gets better
+        if any([a["label"] == "face" for a in current_thumb["attributes"]]):
+            return False
+
+    # check license_plate on car
+    if label == "car":
+        if has_better_attr(current_thumb, new_obj, "license_plate"):
+            return True
+        # if the current thumb has a license_plate attr, dont update unless it gets better
+        if any([a["label"] == "license_plate" for a in current_thumb["attributes"]]):
+            return False
+
+    # if the new_thumb is on an edge, and the current thumb is not
+    if on_edge(new_obj["box"], frame_shape) and not on_edge(
+        current_thumb["box"], frame_shape
+    ):
+        return False
+
+    # if the score is better by more than 5%
+    if new_obj["score"] > current_thumb["score"] + 0.05:
+        return True
+
+    # if the area is 10% larger
+    if new_obj["area"] > current_thumb["area"] * 1.1:
+        return True
+
+    return False
+
+
 def draw_timestamp(
     frame,
     timestamp,
diff --git a/frigate/video.py b/frigate/video.py
index 0f051b6b2..c0341446a 100755
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -27,7 +27,7 @@ from frigate.object_detection import RemoteObjectDetector
 from frigate.ptz.autotrack import ptz_moving_at_frame_time
 from frigate.track import ObjectTracker
 from frigate.track.norfair_tracker import NorfairTracker
-from frigate.track.object_attribute import ObjectAttribute
+from frigate.track.tracked_object import TrackedObjectAttribute
 from frigate.util.builtin import EventsPerSecond, get_tomorrow_at_time
 from frigate.util.image import (
     FrameManager,
@@ -734,10 +734,10 @@ def process_frames(
                 object_tracker.update_frame_times(frame_time)
 
         # group the attribute detections based on what label they apply to
-        attribute_detections: dict[str, list[ObjectAttribute]] = {}
+        attribute_detections: dict[str, list[TrackedObjectAttribute]] = {}
         for label, attribute_labels in model_config.attributes_map.items():
             attribute_detections[label] = [
-                ObjectAttribute(d)
+                TrackedObjectAttribute(d)
                 for d in consolidated_detections
                 if d[0] in attribute_labels
             ]

From b299652e86869dae080725fceecf9eb34efbe254 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Thu, 17 Oct 2024 11:15:44 -0500
Subject: [PATCH 3/5] Generative AI changes (#14413)

* Update default genai prompt

* Update docs

* improve wording

* clarify wording
---
 docs/docs/configuration/genai.md           | 22 ++++++++++++++--------
 docs/docs/configuration/semantic_search.md | 11 ++++++-----
 frigate/config/camera/genai.py             |  4 ++--
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md
index e2f6ac318..aace224f3 100644
--- a/docs/docs/configuration/genai.md
+++ b/docs/docs/configuration/genai.md
@@ -3,7 +3,7 @@ id: genai
 title: Generative AI
 ---
 
-Generative AI can be used to automatically generate descriptions based on the thumbnails of your tracked objects. This helps with [Semantic Search](/configuration/semantic_search) in Frigate by providing detailed text descriptions as a basis of the search query.
+Generative AI can be used to automatically generate descriptive text based on the thumbnails of your tracked objects. This helps with [Semantic Search](/configuration/semantic_search) in Frigate to provide more context about your tracked objects.
 
 Semantic Search must be enabled to use Generative AI. Descriptions are accessed via the _Explore_ view in the Frigate UI by clicking on a tracked object's thumbnail.
 
@@ -122,12 +122,18 @@ genai:
   api_key: "{FRIGATE_OPENAI_API_KEY}"
 ```
 
+## Usage and Best Practices
+
+Frigate's thumbnail search excels at identifying specific details about tracked objects – for example, using an "image caption" approach to find a "person wearing a yellow vest," "a white dog running across the lawn," or "a red car on a residential street." To enhance this further, Frigate’s default prompts are designed to ask your AI provider about the intent behind the object's actions, rather than just describing its appearance.
+
+While generating simple descriptions of detected objects is useful, understanding intent provides a deeper layer of insight. Instead of just recognizing "what" is in a scene, Frigate’s default prompts aim to infer "why" it might be there or "what" it could do next. Descriptions tell you what’s happening, but intent gives context. For instance, a person walking toward a door might seem like a visitor, but if they’re moving quickly after hours, you can infer a potential break-in attempt. Detecting a person loitering near a door at night can trigger an alert sooner than simply noting "a person standing by the door," helping you respond based on the situation’s context.
+
 ## Custom Prompts
 
 Frigate sends multiple frames from the tracked object along with a prompt to your Generative AI provider asking it to generate a description. The default prompt is as follows:
 
 ```
-Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.
+Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.
 ```
 
 :::tip
@@ -144,10 +150,10 @@ genai:
   provider: ollama
   base_url: http://localhost:11434
   model: llava
-  prompt: "Describe the {label} in these images from the {camera} security camera."
+  prompt: "Analyze the {label} in these images from the {camera} security camera. Focus on the actions, behavior, and potential intent of the {label}, rather than just describing its appearance."
   object_prompts:
-    person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc)."
-    car: "Label the primary vehicle in these images with just the name of the company if it is a delivery vehicle, or the color make and model."
+    person: "Examine the main person in these images. What are they doing and what might their actions suggest about their intent (e.g., approaching a door, leaving an area, standing still)? Do not describe the surroundings or static details."
+    car: "Observe the primary vehicle in these images. Focus on its movement, direction, or purpose (e.g., parking, approaching, circling). If it's a delivery vehicle, mention the company."
 ```
 
 Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones.
@@ -159,10 +165,10 @@ cameras:
   front_door:
     genai:
       use_snapshot: True
-      prompt: "Describe the {label} in these images from the {camera} security camera at the front door of a house, aimed outward toward the street."
+      prompt: "Analyze the {label} in these images from the {camera} security camera at the front door. Focus on the actions and potential intent of the {label}."
       object_prompts:
-        person: "Describe the main person in these images (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from."
-        cat: "Describe the cat in these images (color, size, tail). Indicate whether or not the cat is by the flower pots. If the cat is chasing a mouse, make up a name for the mouse."
+        person: "Examine the person in these images. What are they doing, and how might their actions suggest their purpose (e.g., delivering something, approaching, leaving)? If they are carrying or interacting with a package, include details about its source or destination."
+        cat: "Observe the cat in these images. Focus on its movement and intent (e.g., wandering, hunting, interacting with objects). If the cat is near the flower pots or engaging in any specific actions, mention it."
       objects:
         - person
         - cat
diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index 18093a479..a7b35ed77 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -50,10 +50,11 @@ semantic_search:
 - Configuring the `large` model employs the full Jina model and will automatically run on the GPU if applicable.
 - Configuring the `small` model employs a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality.
 
-## Usage
+## Usage and Best Practices
 
 1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results.
-2. Because of how the AI models Frigate uses have been trained, the comparison between text and image embedding distances generally means that results matching `description` will appear first, even if a `thumbnail` embedding may be a better match. Play with the "Search Type" setting to help find what you are looking for. Note that if you are generating descriptions for specific objects or zones only, this may cause search results to prioritize the objects with descriptions even if the the ones without them are more relevant.
-3. Make your search language and tone closely match your descriptions. If you are using thumbnail search, **phrase your query as an image caption**. For example "red car" will not work as well as "red sedan driving down a residential street on a sunny day".
-4. Semantic search on thumbnails tends to return better results when matching large subjects that take up most of the frame. Small things like "cat" tend to not work well.
-5. Experiment! Find a tracked object you want to test and start typing keywords and phrases to see what works for you.
+2. Use the thumbnail search type when searching for particular objects in the scene. Use the description search type when attempting to discern the intent of your object.
+3. Because of how the AI models Frigate uses have been trained, the comparison between text and image embedding distances generally means that with multi-modal (`thumbnail` and `description`) searches, results matching `description` will appear first, even if a `thumbnail` embedding may be a better match. Play with the "Search Type" setting to help find what you are looking for. Note that if you are generating descriptions for specific objects or zones only, this may cause search results to prioritize the objects with descriptions even if the the ones without them are more relevant.
+4. Make your search language and tone closely match exactly what you're looking for. If you are using thumbnail search, **phrase your query as an image caption**. Searching for "red car" may not work as well as "red sedan driving down a residential street on a sunny day".
+5. Semantic search on thumbnails tends to return better results when matching large subjects that take up most of the frame. Small things like "cat" tend to not work well.
+6. Experiment! Find a tracked object you want to test and start typing keywords and phrases to see what works for you.
diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py
index 21c3d4525..35c26eaf8 100644
--- a/frigate/config/camera/genai.py
+++ b/frigate/config/camera/genai.py
@@ -23,7 +23,7 @@ class GenAICameraConfig(BaseModel):
         default=False, title="Use snapshots for generating descriptions."
     )
     prompt: str = Field(
-        default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
+        default="Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.",
         title="Default caption prompt.",
     )
     object_prompts: dict[str, str] = Field(
@@ -51,7 +51,7 @@ class GenAICameraConfig(BaseModel):
 class GenAIConfig(FrigateBaseModel):
     enabled: bool = Field(default=False, title="Enable GenAI.")
     prompt: str = Field(
-        default="Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background.",
+        default="Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.",
         title="Default caption prompt.",
     )
     object_prompts: dict[str, str] = Field(

From 5d8bcb42c647d1d598a638edcc8a78167e8dd5d9 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Thu, 17 Oct 2024 11:21:27 -0500
Subject: [PATCH 4/5] Fix autotrack to work with new tracked object package
 (#14414)

---
 frigate/ptz/autotrack.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frigate/ptz/autotrack.py b/frigate/ptz/autotrack.py
index e9226f267..24b12087d 100644
--- a/frigate/ptz/autotrack.py
+++ b/frigate/ptz/autotrack.py
@@ -1275,7 +1275,7 @@ class PtzAutoTracker:
                 # If it's within bounds, start tracking that object.
                 # Should we check region (maybe too broad) or expand the previous object's box a bit and check that?
                 self.tracked_object[camera] is None
-                and obj.camera == camera
+                and obj.camera_config.name == camera
                 and obj.obj_data["label"] in self.object_types[camera]
                 and not obj.previous["false_positive"]
                 and not obj.false_positive

From b56f4c4558e553283caf9e8360d65de5758708d3 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Fri, 18 Oct 2024 09:07:29 -0500
Subject: [PATCH 5/5] Semantic search docs update (#14438)

* Add minimum requirements to semantic search docs

* clarify
---
 docs/docs/configuration/semantic_search.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
index a7b35ed77..7f84fdf95 100644
--- a/docs/docs/configuration/semantic_search.md
+++ b/docs/docs/configuration/semantic_search.md
@@ -9,6 +9,14 @@ Frigate has support for [Jina AI's CLIP model](https://huggingface.co/jinaai/jin
 
 Semantic Search is accessed via the _Explore_ view in the Frigate UI.
 
+## Minimum System Requirements
+
+Semantic Search works by running a large AI model locally on your system. Small or underpowered systems like a Raspberry Pi will not run Semantic Search reliably or at all.
+
+A minimum of 8GB of RAM is required to use Semantic Search. A GPU is not strictly required but will provide a significant performance increase over CPU-only systems.
+
+For best performance, 16GB or more of RAM and a dedicated GPU are recommended.
+
 ## Configuration
 
 Semantic search is disabled by default, and must be enabled in your config file before it can be used. Semantic Search is a global configuration setting.