Compare commits

...

8 Commits

Author SHA1 Message Date
mathieu-d
d18b7f8f97
Merge 0f3dd097ec into 0ea8924727 2026-04-26 04:46:56 -06:00
Nicolas Mowen
0ea8924727
GenAI Optimizations (#23006)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Test for image token usage in llama.cpp so we can more appropriately decide how many frames to include

* Limit based on frames per second

* handle zone case sensitivity

* Improve formatting

* Add observations field so model can build CoT before outputting used fields
2026-04-25 17:38:18 -05:00
Josh Hawkins
1a1994ca17
Miscellaneous fixes (#23000)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* ensure classification wizard dialog is scrollable on mobile too

* add chat and features group to mobile menu

Co-authored-by: Copilot <copilot@github.com>

* Set min length for summary too

* Don't use orange for review item

---------

Co-authored-by: Copilot <copilot@github.com>
Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>
2026-04-25 09:12:20 -06:00
matieu-d
0f3dd097ec Prepare for pull request. Remove specific configurations 2026-04-17 22:25:46 +02:00
matieu-d
2a4d7e4766 Prepare for pull request. Remove specific configurations 2026-04-14 23:14:31 +02:00
matieu-d
46415ffeb5 Add Hailo-10H detector configuration to global.json 2026-04-14 22:54:58 +02:00
matieu-d
e35ab0b8a1 Add support of temperature reading for hailo 10H 2026-04-14 22:54:58 +02:00
matieu-d
837373547d H10 support patch 2026-04-14 22:54:58 +02:00
18 changed files with 745 additions and 52 deletions

View File

@ -21,6 +21,13 @@ local: version
--tag frigate:latest \
--load
localh10: version
docker buildx build --target=frigate --file docker/main/Dockerfile . \
--build-arg HAILORT_VERSION=5.1.1 \
--build-arg HAILORT_GIT_REPO=mathieu-d/hailort \
--tag frigate:latest \
--load
debug: version
docker buildx build --target=frigate --file docker/main/Dockerfile . \
--build-arg DEBUG=true \

View File

@ -12,6 +12,11 @@ services:
build:
context: .
dockerfile: docker/main/Dockerfile
# Use args to specify hailort version and location
# args:
# HAILORT_VERSION: "5.1.1"
# HAILORT_GIT_REPO: "mathieu-d/hailort"
# Use target devcontainer-trt for TensorRT dev
target: devcontainer
cache_from:
@ -29,6 +34,7 @@ services:
# devices:
# - /dev/bus/usb:/dev/bus/usb # Uncomment for Google Coral USB
# - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware
volumes:
- .:/workspace/frigate:cached
- ./web/dist:/opt/frigate/web:cached

View File

@ -0,0 +1,7 @@
#!/bin/bash
# Update package list and install hailo driver version 5.1.1 for Hailo-10H
sudo apt update
sudo apt install -y hailo-h10-all=5.1.1

View File

@ -157,6 +157,8 @@ FROM base AS wheels
ARG DEBIAN_FRONTEND
ARG TARGETARCH
ARG DEBUG=false
ARG HAILORT_VERSION=4.21.0
ARG HAILORT_GIT_REPO=frigate-nvr/hailort
# Use a separate container to build wheels to prevent build dependencies in final image
RUN apt-get -qq update \

View File

@ -2,13 +2,11 @@
set -euxo pipefail
hailo_version="4.21.0"
if [[ "${TARGETARCH}" == "amd64" ]]; then
arch="x86_64"
elif [[ "${TARGETARCH}" == "arm64" ]]; then
arch="aarch64"
fi
wget -qO- "https://github.com/frigate-nvr/hailort/releases/download/v${hailo_version}/hailort-debian12-${TARGETARCH}.tar.gz" | tar -C / -xzf -
wget -P /wheels/ "https://github.com/frigate-nvr/hailort/releases/download/v${hailo_version}/hailort-${hailo_version}-cp311-cp311-linux_${arch}.whl"
wget -qO- "https://github.com/${HAILORT_GIT_REPO}/releases/download/v${HAILORT_VERSION}/hailort-debian12-${TARGETARCH}.tar.gz" | tar -C / -xzf -
wget -P /wheels/ "https://github.com/${HAILORT_GIT_REPO}/releases/download/v${HAILORT_VERSION}/hailort-${HAILORT_VERSION}-cp311-cp311-linux_${arch}.whl"

View File

@ -36,6 +36,7 @@ from frigate.api.defs.response.chat_response import (
)
from frigate.api.defs.tags import Tags
from frigate.api.event import events
from frigate.config import FrigateConfig
from frigate.genai.utils import build_assistant_message_for_conversation
from frigate.jobs.vlm_watch import (
get_vlm_watch_job,
@ -401,9 +402,38 @@ def get_tools() -> JSONResponse:
return JSONResponse(content={"tools": tools})
def _resolve_zones(
zones: List[str],
config: FrigateConfig,
target_cameras: List[str],
) -> List[str]:
"""Map zone names to their canonical config keys, case-insensitively.
LLMs frequently echo a user's casing ("Front Yard") instead of the
configured key ("front_yard"). The downstream zone filter is a SQLite GLOB
over the JSON-encoded zones column, which is case-sensitive so an
unnormalized name silently returns zero matches. Build a lookup over the
relevant cameras' configured zones and substitute when we find a match;
unknown names pass through so behavior matches what the model asked for.
"""
if not zones:
return zones
lookup: Dict[str, str] = {}
for camera_id in target_cameras:
camera_config = config.cameras.get(camera_id)
if camera_config is None:
continue
for zone_name in camera_config.zones.keys():
lookup.setdefault(zone_name.lower(), zone_name)
return [lookup.get(z.lower(), z) for z in zones]
async def _execute_search_objects(
arguments: Dict[str, Any],
allowed_cameras: List[str],
config: FrigateConfig,
) -> JSONResponse:
"""
Execute the search_objects tool.
@ -437,6 +467,11 @@ async def _execute_search_objects(
# Convert zones array to comma-separated string if provided
zones = arguments.get("zones")
if isinstance(zones, list):
camera_arg = arguments.get("camera")
target_cameras = (
[camera_arg] if camera_arg and camera_arg != "all" else allowed_cameras
)
zones = _resolve_zones(zones, config, target_cameras)
zones = ",".join(zones)
elif zones is None:
zones = "all"
@ -528,6 +563,11 @@ async def _execute_find_similar_objects(
sub_labels = arguments.get("sub_labels")
zones = arguments.get("zones")
if zones:
zones = _resolve_zones(
zones, request.app.frigate_config, cameras or list(allowed_cameras)
)
similarity_mode = arguments.get("similarity_mode", "fused")
if similarity_mode not in ("visual", "semantic", "fused"):
similarity_mode = "fused"
@ -655,7 +695,9 @@ async def execute_tool(
logger.debug(f"Executing tool: {tool_name} with arguments: {arguments}")
if tool_name == "search_objects":
return await _execute_search_objects(arguments, allowed_cameras)
return await _execute_search_objects(
arguments, allowed_cameras, request.app.frigate_config
)
if tool_name == "find_similar_objects":
result = await _execute_find_similar_objects(
@ -835,7 +877,9 @@ async def _execute_tool_internal(
This is used by the chat completion endpoint to execute tools.
"""
if tool_name == "search_objects":
response = await _execute_search_objects(arguments, allowed_cameras)
response = await _execute_search_objects(
arguments, allowed_cameras, request.app.frigate_config
)
try:
if hasattr(response, "body"):
body_str = response.body.decode("utf-8")
@ -899,6 +943,9 @@ async def _execute_start_camera_watch(
await require_camera_access(camera, request=request)
if zones:
zones = _resolve_zones(zones, config, [camera])
genai_manager = request.app.genai_manager
chat_client = genai_manager.chat_client
if chat_client is None or not chat_client.supports_vision:

View File

@ -39,6 +39,8 @@ logger = logging.getLogger(__name__)
RECORDING_BUFFER_EXTENSION_PERCENT = 0.10
MIN_RECORDING_DURATION = 10
MAX_IMAGE_TOKENS = 24000
MAX_FRAMES_PER_SECOND = 2
class ReviewDescriptionProcessor(PostProcessorApi):
@ -60,14 +62,22 @@ class ReviewDescriptionProcessor(PostProcessorApi):
def calculate_frame_count(
self,
camera: str,
duration: float,
image_source: ImageSourceEnum = ImageSourceEnum.preview,
height: int = 480,
) -> int:
"""Calculate optimal number of frames based on context size, image source, and resolution.
"""Calculate optimal number of frames based on event duration, context size,
image source, and resolution.
Token usage varies by resolution: larger images (ultra-wide aspect ratios) use more tokens.
Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
Capped at 20 frames.
Per-image token cost is asked of the GenAI provider so providers that know
their model's true cost (e.g. llama.cpp can probe the loaded mmproj) can
diverge from the default ~1-token-per-1250-pixels heuristic. The frame
budget is bounded by:
- remaining context window after prompt + response reservations
- a fixed MAX_IMAGE_TOKENS ceiling
- MAX_FRAMES_PER_SECOND x duration, to avoid drowning short events in
near-duplicate frames where the model latches onto the redundant middle
and skips the start/end action
"""
client = self.genai_manager.description_client
@ -105,14 +115,15 @@ class ReviewDescriptionProcessor(PostProcessorApi):
width = target_width
height = int(target_width / aspect_ratio)
pixels_per_image = width * height
tokens_per_image = pixels_per_image / 1250
tokens_per_image = client.estimate_image_tokens(width, height)
prompt_tokens = 3800
response_tokens = 300
available_tokens = context_size - prompt_tokens - response_tokens
max_frames = int(available_tokens / tokens_per_image)
return min(max(max_frames, 3), 20)
context_budget = context_size - prompt_tokens - response_tokens
image_token_budget = min(context_budget, MAX_IMAGE_TOKENS)
max_frames_by_tokens = int(image_token_budget / tokens_per_image)
max_frames_by_duration = int(duration * MAX_FRAMES_PER_SECOND)
max_frames = min(max_frames_by_tokens, max_frames_by_duration)
return max(max_frames, 3)
def process_data(
self, data: dict[str, Any], data_type: PostProcessDataEnum
@ -376,7 +387,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
all_frames.append(os.path.join(preview_dir, file))
frame_count = len(all_frames)
desired_frame_count = self.calculate_frame_count(camera)
desired_frame_count = self.calculate_frame_count(
camera, duration=end_time - start_time
)
if frame_count <= desired_frame_count:
return all_frames
@ -400,7 +413,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
"""Get frames from recordings at specified timestamps."""
duration = end_time - start_time
desired_frame_count = self.calculate_frame_count(
camera, ImageSourceEnum.recordings, height
camera, duration, ImageSourceEnum.recordings, height
)
# Calculate evenly spaced timestamps throughout the duration

View File

@ -4,12 +4,14 @@ from pydantic import BaseModel, ConfigDict, Field
class ReviewMetadata(BaseModel):
model_config = ConfigDict(extra="ignore", protected_namespaces=())
observations: list[str] = Field(
default_factory=list,
description="Chronological list of significant observations from the frames, written before the scene narrative is composed.",
)
title: str = Field(
description="A short title characterizing what took place and where, under 10 words."
)
scene: str = Field(
min_length=120,
max_length=600,
description="A chronological narrative of what happens from start to finish.",
)
shortSummary: str = Field(

View File

@ -0,0 +1,415 @@
import logging
import os
import subprocess
import threading
import urllib.request
from functools import partial
from typing import Dict, List, Optional, Tuple
import cv2
import numpy as np
from pydantic import ConfigDict, Field
from typing_extensions import Literal
from frigate.const import MODEL_CACHE_DIR
from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detector_config import (
BaseDetectorConfig,
)
from frigate.object_detection.util import RequestStore, ResponseStore
logger = logging.getLogger(__name__)
# ----------------- Utility Functions ----------------- #
def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarray:
"""
Resize an image with unchanged aspect ratio using padding.
Assumes input image shape is (H, W, 3).
"""
if image.ndim == 4 and image.shape[0] == 1:
image = image[0]
h, w = image.shape[:2]
scale = min(model_w / w, model_h / h)
new_w, new_h = int(w * scale), int(h * scale)
resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
padded_image = np.full((model_h, model_w, 3), 114, dtype=image.dtype)
x_offset = (model_w - new_w) // 2
y_offset = (model_h - new_h) // 2
padded_image[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = (
resized_image
)
return padded_image
# ----------------- Global Constants ----------------- #
DETECTOR_KEY = "hailo10h"
ARCH = None
H10H_DEFAULT_MODEL = "yolov6n.hef"
H10H_DEFAULT_URL = "https://hailo-model-zoo.s3.eu-west-2.amazonaws.com/ModelZoo/Compiled/v5.2.0/hailo10h/yolov6n.hef"
def detect_hailo_arch():
try:
result = subprocess.run(
["hailortcli", "fw-control", "identify"], capture_output=True, text=True
)
if result.returncode != 0:
logger.error(f"Inference error: {result.stderr}")
return None
for line in result.stdout.split("\n"):
if "Device Architecture" in line:
if "HAILO10H" in line:
return "hailo10h"
logger.error("Inference error: Could not determine Hailo architecture.")
return None
except Exception as e:
logger.error(f"Inference error: {e}")
return None
# ----------------- HailoAsyncInference Class ----------------- #
class HailoAsyncInference:
def __init__(
self,
hef_path: str,
input_store: RequestStore,
output_store: ResponseStore,
batch_size: int = 1,
input_type: Optional[str] = None,
output_type: Optional[Dict[str, str]] = None,
send_original_frame: bool = False,
) -> None:
# when importing hailo it activates the driver
# which leaves processes running even though it may not be used.
try:
from hailo_platform import (
HEF,
FormatType,
HailoSchedulingAlgorithm,
VDevice,
)
except ModuleNotFoundError:
pass
self.input_store = input_store
self.output_store = output_store
params = VDevice.create_params()
params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN
self.hef = HEF(hef_path)
self.target = VDevice(params)
self.infer_model = self.target.create_infer_model(hef_path)
self.infer_model.set_batch_size(batch_size)
if input_type is not None:
self.infer_model.input().set_format_type(getattr(FormatType, input_type))
if output_type is not None:
for output_name, output_type in output_type.items():
self.infer_model.output(output_name).set_format_type(
getattr(FormatType, output_type)
)
self.output_type = output_type
self.send_original_frame = send_original_frame
def callback(
self,
completion_info,
bindings_list: List,
input_batch: List,
request_ids: List[int],
):
if completion_info.exception:
logger.error(f"Inference error: {completion_info.exception}")
else:
for i, bindings in enumerate(bindings_list):
if len(bindings._output_names) == 1:
result = bindings.output().get_buffer()
else:
result = {
name: np.expand_dims(bindings.output(name).get_buffer(), axis=0)
for name in bindings._output_names
}
self.output_store.put(request_ids[i], (input_batch[i], result))
def _create_bindings(self, configured_infer_model) -> object:
if self.output_type is None:
output_buffers = {
output_info.name: np.empty(
self.infer_model.output(output_info.name).shape,
dtype=getattr(
np, str(output_info.format.type).split(".")[1].lower()
),
)
for output_info in self.hef.get_output_vstream_infos()
}
else:
output_buffers = {
name: np.empty(
self.infer_model.output(name).shape,
dtype=getattr(np, self.output_type[name].lower()),
)
for name in self.output_type
}
return configured_infer_model.create_bindings(output_buffers=output_buffers)
def get_input_shape(self) -> Tuple[int, ...]:
return self.hef.get_input_vstream_infos()[0].shape
def run(self) -> None:
job = None
with self.infer_model.configure() as configured_infer_model:
while True:
batch_data = self.input_store.get()
if batch_data is None:
break
request_id, frame_data = batch_data
preprocessed_batch = [frame_data]
request_ids = [request_id]
input_batch = preprocessed_batch # non-send_original_frame mode
bindings_list = []
for frame in preprocessed_batch:
bindings = self._create_bindings(configured_infer_model)
bindings.input().set_buffer(np.array(frame))
bindings_list.append(bindings)
configured_infer_model.wait_for_async_ready(timeout_ms=10000)
job = configured_infer_model.run_async(
bindings_list,
partial(
self.callback,
input_batch=input_batch,
request_ids=request_ids,
bindings_list=bindings_list,
),
)
if job is not None:
job.wait(100)
# ----------------- HailoDetector Class ----------------- #
class HailoDetector(DetectionApi):
type_key = DETECTOR_KEY
def __init__(self, detector_config: "HailoDetectorConfig"):
global ARCH
ARCH = detect_hailo_arch()
self.cache_dir = MODEL_CACHE_DIR
self.device_type = detector_config.device
self.model_height = (
detector_config.model.height
if hasattr(detector_config.model, "height")
else None
)
self.model_width = (
detector_config.model.width
if hasattr(detector_config.model, "width")
else None
)
self.model_type = (
detector_config.model.model_type
if hasattr(detector_config.model, "model_type")
else None
)
self.tensor_format = (
detector_config.model.input_tensor
if hasattr(detector_config.model, "input_tensor")
else None
)
self.pixel_format = (
detector_config.model.input_pixel_format
if hasattr(detector_config.model, "input_pixel_format")
else None
)
self.input_dtype = (
detector_config.model.input_dtype
if hasattr(detector_config.model, "input_dtype")
else None
)
self.output_type = "FLOAT32"
self.set_path_and_url(detector_config.model.path)
self.working_model_path = self.check_and_prepare()
self.batch_size = 1
self.input_store = RequestStore()
self.response_store = ResponseStore()
try:
logger.debug(f"[INIT] Loading HEF model from {self.working_model_path}")
self.inference_engine = HailoAsyncInference(
self.working_model_path,
self.input_store,
self.response_store,
self.batch_size,
)
self.input_shape = self.inference_engine.get_input_shape()
logger.debug(f"[INIT] Model input shape: {self.input_shape}")
self.inference_thread = threading.Thread(
target=self.inference_engine.run, daemon=True
)
self.inference_thread.start()
except Exception as e:
logger.error(f"[INIT] Failed to initialize HailoAsyncInference: {e}")
raise
def set_path_and_url(self, path: str = None):
if not path:
self.model_path = None
self.url = None
return
if self.is_url(path):
self.url = path
self.model_path = None
else:
self.model_path = path
self.url = None
def is_url(self, url: str) -> bool:
return (
url.startswith("http://")
or url.startswith("https://")
or url.startswith("www.")
)
@staticmethod
def extract_model_name(path: str = None, url: str = None) -> str:
if path and path.endswith(".hef"):
return os.path.basename(path)
elif url and url.endswith(".hef"):
return os.path.basename(url)
else:
return H10H_DEFAULT_MODEL
@staticmethod
def download_model(url: str, destination: str):
if not url.endswith(".hef"):
raise ValueError("Invalid model URL. Only .hef files are supported.")
try:
urllib.request.urlretrieve(url, destination)
logger.debug(f"Downloaded model to {destination}")
except Exception as e:
raise RuntimeError(f"Failed to download model from {url}: {str(e)}")
def check_and_prepare(self) -> str:
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
model_name = self.extract_model_name(self.model_path, self.url)
cached_model_path = os.path.join(self.cache_dir, model_name)
if not self.model_path and not self.url:
if os.path.exists(cached_model_path):
logger.debug(f"Model found in cache: {cached_model_path}")
return cached_model_path
else:
logger.debug(f"Downloading default model: {model_name}")
self.download_model(H10H_DEFAULT_URL, cached_model_path)
elif self.url:
logger.debug(f"Downloading model from URL: {self.url}")
self.download_model(self.url, cached_model_path)
elif self.model_path:
if os.path.exists(self.model_path):
logger.debug(f"Using existing model at: {self.model_path}")
return self.model_path
else:
raise FileNotFoundError(f"Model file not found at: {self.model_path}")
return cached_model_path
def detect_raw(self, tensor_input):
tensor_input = self.preprocess(tensor_input)
if isinstance(tensor_input, np.ndarray) and len(tensor_input.shape) == 3:
tensor_input = np.expand_dims(tensor_input, axis=0)
request_id = self.input_store.put(tensor_input)
try:
_, infer_results = self.response_store.get(request_id, timeout=1.0)
except TimeoutError:
logger.error(
f"Timeout waiting for inference results for request {request_id}"
)
if not self.inference_thread.is_alive():
raise RuntimeError(
"HailoRT inference thread has stopped, restart required."
)
return np.zeros((20, 6), dtype=np.float32)
if isinstance(infer_results, list) and len(infer_results) == 1:
infer_results = infer_results[0]
threshold = 0.4
all_detections = []
for class_id, detection_set in enumerate(infer_results):
if not isinstance(detection_set, np.ndarray) or detection_set.size == 0:
continue
for det in detection_set:
if det.shape[0] < 5:
continue
score = float(det[4])
if score < threshold:
continue
all_detections.append([class_id, score, det[0], det[1], det[2], det[3]])
if len(all_detections) == 0:
detections_array = np.zeros((20, 6), dtype=np.float32)
else:
detections_array = np.array(all_detections, dtype=np.float32)
if detections_array.shape[0] > 20:
detections_array = detections_array[:20, :]
elif detections_array.shape[0] < 20:
pad = np.zeros((20 - detections_array.shape[0], 6), dtype=np.float32)
detections_array = np.vstack((detections_array, pad))
return detections_array
def preprocess(self, image):
if isinstance(image, np.ndarray):
processed = preprocess_tensor(
image, self.input_shape[1], self.input_shape[0]
)
return np.expand_dims(processed, axis=0)
else:
raise ValueError("Unsupported image format for preprocessing")
def close(self):
"""Properly shuts down the inference engine and releases the VDevice."""
logger.debug("[CLOSE] Closing HailoDetector")
try:
if hasattr(self, "inference_engine"):
if hasattr(self.inference_engine, "target"):
self.inference_engine.target.release()
logger.debug("Hailo VDevice released successfully")
except Exception as e:
logger.error(f"Failed to close Hailo device: {e}")
raise
def __del__(self):
"""Destructor to ensure cleanup when the object is deleted."""
self.close()
# ----------------- HailoDetectorConfig Class ----------------- #
class HailoDetectorConfig(BaseDetectorConfig):
"""Hailo10H detector using HEF models and the HailoRT SDK for inference on Hailo hardware."""
model_config = ConfigDict(
title="Hailo-10H",
)
type: Literal[DETECTOR_KEY]
device: str = Field(
default="PCIe",
title="Device Type",
description="The device to use for Hailo inference (e.g. 'PCIe', 'M.2').",
)

View File

@ -151,6 +151,50 @@ Each line represents a detection state, not necessarily unique individuals. The
if "other_concerns" in schema.get("required", []):
schema["required"].remove("other_concerns")
# Length hints injected into the schema as suggestions to the model
# (enforced by grammar-based providers like llama.cpp) but kept off the
# Pydantic model so a non-compliant response does not fail validation.
length_hints = {
"scene": {"minLength": 120, "maxLength": 600},
"shortSummary": {"minLength": 70, "maxLength": 100},
}
for field, hints in length_hints.items():
prop = schema.get("properties", {}).get(field)
if prop is not None:
prop.update(hints)
# observations is a chain-of-thought-by-schema field: forcing the model
# to enumerate concrete facts before writing scene/title surfaces details
# the narrative would otherwise gloss past (e.g. brief vehicle arrivals
# overshadowed by a longer activity). The minItems floor scales with
# event duration so longer clips get more observations.
observations_prop = schema.get("properties", {}).get("observations")
if observations_prop is not None:
duration_seconds = float(review_data.get("duration") or 0)
min_observations = max(3, round(duration_seconds / 5))
max_observations = min_observations + 8
observations_prop["description"] = (
"Enumerate the significant observations across all frames, in "
"chronological order, BEFORE composing the scene narrative. "
"Include the very start of the activity — for example, a "
"vehicle entering the frame or pulling into the driveway — "
"even if it lasts only a few frames and the rest of the clip "
"is dominated by a longer activity. Include each arrival, "
"departure, motion event, object handled, and notable change "
"in position or state. Each item is a single concrete fact "
"written as a complete sentence (e.g., 'A blue sedan turns "
"from the street into the driveway', 'Nick exits the driver "
"side carrying a plant pot'). Do not summarize, interpret, or "
"assign meaning here — that belongs in the scene field."
)
observations_prop["minItems"] = min_observations
observations_prop["maxItems"] = max_observations
observations_prop["items"] = {"type": "string", "minLength": 20}
required = schema.setdefault("required", [])
if "observations" not in required:
required.append("observations")
# OpenAI strict mode requires additionalProperties: false on all objects
schema["additionalProperties"] = False
@ -344,6 +388,14 @@ Guidelines:
"""Get the context window size for this provider in tokens."""
return 4096
def estimate_image_tokens(self, width: int, height: int) -> float:
"""Estimate prompt tokens consumed by a single image of the given dimensions.
Default heuristic: ~1 token per 1250 pixels. Providers that can measure or
know their model's exact image-token cost should override.
"""
return (width * height) / 1250
def embed(
self,
texts: list[str] | None = None,

View File

@ -42,6 +42,8 @@ class LlamaCppClient(GenAIClient):
_supports_vision: bool
_supports_audio: bool
_supports_tools: bool
_image_token_cache: dict[tuple[int, int], int]
_text_baseline_tokens: int | None
def _init_provider(self) -> str | None:
"""Initialize the client and query model metadata from the server."""
@ -52,6 +54,8 @@ class LlamaCppClient(GenAIClient):
self._supports_vision = False
self._supports_audio = False
self._supports_tools = False
self._image_token_cache = {}
self._text_baseline_tokens = None
base_url = (
self.genai_config.base_url.rstrip("/")
@ -272,6 +276,91 @@ class LlamaCppClient(GenAIClient):
return self._context_size
return 4096
def estimate_image_tokens(self, width: int, height: int) -> float:
"""Probe the llama.cpp server to learn the model's image-token cost at the
requested dimensions.
llama.cpp's image tokenization is a deterministic function of dimensions and
the loaded mmproj, so the result is cached per (width, height) for the
lifetime of the process. Falls back to the base pixel heuristic if the
server is unreachable or the response is malformed.
"""
if self.provider is None:
return super().estimate_image_tokens(width, height)
cached = self._image_token_cache.get((width, height))
if cached is not None:
return cached
try:
baseline = self._probe_baseline_tokens()
with_image = self._probe_image_prompt_tokens(width, height)
tokens = max(1, with_image - baseline)
except Exception as e:
logger.debug(
"llama.cpp image-token probe failed for %dx%d (%s); using heuristic",
width,
height,
e,
)
return super().estimate_image_tokens(width, height)
self._image_token_cache[(width, height)] = tokens
logger.debug(
"llama.cpp model '%s' uses ~%d tokens for %dx%d images",
self.genai_config.model,
tokens,
width,
height,
)
return tokens
def _probe_baseline_tokens(self) -> int:
"""Return prompt_tokens for a minimal text-only request. Cached after first call."""
if self._text_baseline_tokens is not None:
return self._text_baseline_tokens
self._text_baseline_tokens = self._probe_prompt_tokens(
[{"type": "text", "text": "."}]
)
return self._text_baseline_tokens
def _probe_image_prompt_tokens(self, width: int, height: int) -> int:
"""Return prompt_tokens for a single synthetic image plus minimal text."""
img = Image.new("RGB", (width, height), (128, 128, 128))
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=60)
encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
return self._probe_prompt_tokens(
[
{"type": "text", "text": "."},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{encoded}"},
},
]
)
def _probe_prompt_tokens(self, content: list[dict[str, Any]]) -> int:
"""POST a 1-token chat completion and return reported prompt_tokens.
Uses a generous timeout to absorb a cold model load on the first probe
when the server lazily loads models on demand (e.g. llama-swap).
"""
payload = {
"model": self.genai_config.model,
"messages": [{"role": "user", "content": content}],
"max_tokens": 1,
}
response = requests.post(
f"{self.provider}/v1/chat/completions",
json=payload,
timeout=60,
)
response.raise_for_status()
return int(response.json()["usage"]["prompt_tokens"])
def _build_payload(
self,
messages: list[dict[str, Any]],

View File

@ -123,6 +123,15 @@ def get_detector_temperature(
if index < len(hailo_device_names):
device_name = hailo_device_names[index]
return hailo_temps[device_name]
elif detector_type == "hailo10h":
# Get temperatures for Hailo devices
hailo_temps = get_hailo_temps()
if hailo_temps:
hailo_device_names = sorted(hailo_temps.keys())
index = detector_index_by_type.get("hailo10h", 0)
if index < len(hailo_device_names):
device_name = hailo_device_names[index]
return hailo_temps[device_name]
elif detector_type == "rknn":
# Rockchip temperatures are handled by the GPU / NPU stats
# as there are not detector specific temperatures

View File

@ -257,6 +257,7 @@
"export": "Export",
"actions": "Actions",
"uiPlayground": "UI Playground",
"features": "Features",
"faceLibrary": "Face Library",
"classification": "Classification",
"chat": "Chat",

View File

@ -397,6 +397,14 @@
"description": "The device to use for Hailo inference (e.g. 'PCIe', 'M.2')."
}
},
"hailo10h": {
"label": "Hailo-10H",
"description": "Hailo-10H detector using HEF models and the HailoRT SDK for inference on Hailo hardware.",
"device": {
"label": "Device Type",
"description": "The device to use for Hailo inference (e.g. 'PCIe', 'M.2')."
}
},
"memryx": {
"label": "MemryX",
"description": "MemryX MX3 detector that runs compiled DFP models on MemryX accelerators.",

View File

@ -161,13 +161,13 @@ export function AnimatedEventCard({
<TooltipTrigger asChild>
<Button
className={cn(
"absolute left-2 top-1 z-40 transition-opacity",
"absolute left-2 top-1 z-40 bg-gray-500 bg-gradient-to-br from-gray-400 to-gray-500 transition-opacity",
threatLevel === ThreatLevel.SECURITY_CONCERN &&
"pointer-events-auto bg-severity_alert opacity-100 hover:bg-severity_alert",
"pointer-events-auto opacity-100",
threatLevel === ThreatLevel.NEEDS_REVIEW &&
"pointer-events-auto bg-severity_detection opacity-100 hover:bg-severity_detection",
"pointer-events-auto opacity-100",
threatLevel === ThreatLevel.NORMAL &&
"pointer-events-none bg-gray-500 bg-gradient-to-br from-gray-400 to-gray-500 opacity-0 group-hover:pointer-events-auto group-hover:opacity-100",
"pointer-events-none opacity-0 group-hover:pointer-events-auto group-hover:opacity-100",
)}
size="xs"
aria-label={t("markAsReviewed")}

View File

@ -155,14 +155,40 @@ export function MessageBubble({
) : (
<div
className={cn(
"[&>*:last-child]:inline",
!isComplete &&
"after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
"[&>p:last-child]:inline after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
)}
>
<ReactMarkdown
remarkPlugins={[remarkGfm]}
components={{
p: ({ node: _n, ...props }) => (
<p className="my-2 first:mt-0 last:mb-0" {...props} />
),
ul: ({ node: _n, ...props }) => (
<ul
className="my-2 list-disc space-y-1 pl-6 first:mt-0 last:mb-0"
{...props}
/>
),
ol: ({ node: _n, ...props }) => (
<ol
className="my-2 list-decimal space-y-1 pl-6 first:mt-0 last:mb-0"
{...props}
/>
),
li: ({ node: _n, ...props }) => (
<li className="pl-1" {...props} />
),
code: ({ node: _n, className, ...props }) => (
<code
className={cn(
"rounded bg-foreground/10 px-1 py-0.5 font-mono text-sm",
className,
)}
{...props}
/>
),
table: ({ node: _n, ...props }) => (
<table
className="my-2 w-full border-collapse border border-border"

View File

@ -14,7 +14,6 @@ import Step3ChooseExamples, {
Step3FormData,
} from "./wizard/Step3ChooseExamples";
import { cn } from "@/lib/utils";
import { isDesktop } from "react-device-detect";
import axios from "axios";
const OBJECT_STEPS = [
@ -153,13 +152,9 @@ export default function ClassificationModelWizardDialog({
>
<DialogContent
className={cn(
"",
isDesktop &&
wizardState.currentStep == 0 &&
"max-h-[90%] overflow-y-auto xl:max-h-[80%]",
isDesktop &&
wizardState.currentStep > 0 &&
"max-h-[90%] max-w-[70%] overflow-y-auto xl:max-h-[80%]",
"scrollbar-container max-h-[90%] overflow-y-auto",
wizardState.currentStep == 0 && "xl:max-h-[80%]",
wizardState.currentStep > 0 && "md:max-w-[70%] xl:max-h-[80%]",
)}
onInteractOutside={(e) => {
e.preventDefault();

View File

@ -6,6 +6,7 @@ import {
LuLifeBuoy,
LuList,
LuLogOut,
LuMessageSquare,
LuMoon,
LuSquarePen,
LuScanFace,
@ -482,21 +483,25 @@ export default function GeneralSettings({ className }: GeneralSettingsProps) {
</Link>
</>
)}
{isAdmin && isMobile && config?.face_recognition.enabled && (
<>
<Link to="/faces">
<MenuItem
className="flex w-full items-center p-2 text-sm"
aria-label={t("menu.faceLibrary")}
>
<LuScanFace className="mr-2 size-4" />
<span>{t("menu.faceLibrary")}</span>
</MenuItem>
</Link>
</>
)}
{isAdmin && isMobile && (
<>
</DropdownMenuGroup>
{isMobile && isAdmin && (
<>
<DropdownMenuLabel className="mt-1">
{t("menu.features")}
</DropdownMenuLabel>
<DropdownMenuSeparator />
<DropdownMenuGroup className="flex flex-col">
{config?.face_recognition.enabled && (
<Link to="/faces">
<MenuItem
className="flex w-full items-center p-2 text-sm"
aria-label={t("menu.faceLibrary")}
>
<LuScanFace className="mr-2 size-4" />
<span>{t("menu.faceLibrary")}</span>
</MenuItem>
</Link>
)}
<Link to="/classification">
<MenuItem
className="flex w-full items-center p-2 text-sm"
@ -506,9 +511,20 @@ export default function GeneralSettings({ className }: GeneralSettingsProps) {
<span>{t("menu.classification")}</span>
</MenuItem>
</Link>
</>
)}
</DropdownMenuGroup>
{config?.genai?.model !== "none" && (
<Link to="/chat">
<MenuItem
className="flex w-full items-center p-2 text-sm"
aria-label={t("menu.chat")}
>
<LuMessageSquare className="mr-2 size-4" />
<span>{t("menu.chat")}</span>
</MenuItem>
</Link>
)}
</DropdownMenuGroup>
</>
)}
<DropdownMenuLabel className={isDesktop ? "mt-3" : "mt-1"}>
{t("menu.appearance")}
</DropdownMenuLabel>