mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-07 22:15:28 +03:00
Compare commits
8 Commits
091c73c825
...
d18b7f8f97
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d18b7f8f97 | ||
|
|
0ea8924727 | ||
|
|
1a1994ca17 | ||
|
|
0f3dd097ec | ||
|
|
2a4d7e4766 | ||
|
|
46415ffeb5 | ||
|
|
e35ab0b8a1 | ||
|
|
837373547d |
7
Makefile
7
Makefile
@ -21,6 +21,13 @@ local: version
|
||||
--tag frigate:latest \
|
||||
--load
|
||||
|
||||
localh10: version
|
||||
docker buildx build --target=frigate --file docker/main/Dockerfile . \
|
||||
--build-arg HAILORT_VERSION=5.1.1 \
|
||||
--build-arg HAILORT_GIT_REPO=mathieu-d/hailort \
|
||||
--tag frigate:latest \
|
||||
--load
|
||||
|
||||
debug: version
|
||||
docker buildx build --target=frigate --file docker/main/Dockerfile . \
|
||||
--build-arg DEBUG=true \
|
||||
|
||||
@ -12,6 +12,11 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/main/Dockerfile
|
||||
# Use args to specify hailort version and location
|
||||
# args:
|
||||
# HAILORT_VERSION: "5.1.1"
|
||||
# HAILORT_GIT_REPO: "mathieu-d/hailort"
|
||||
|
||||
# Use target devcontainer-trt for TensorRT dev
|
||||
target: devcontainer
|
||||
cache_from:
|
||||
@ -29,6 +34,7 @@ services:
|
||||
# devices:
|
||||
# - /dev/bus/usb:/dev/bus/usb # Uncomment for Google Coral USB
|
||||
# - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware
|
||||
|
||||
volumes:
|
||||
- .:/workspace/frigate:cached
|
||||
- ./web/dist:/opt/frigate/web:cached
|
||||
|
||||
7
docker/hailo10h/user_installation.sh
Normal file
7
docker/hailo10h/user_installation.sh
Normal file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Update package list and install hailo driver version 5.1.1 for Hailo-10H
|
||||
sudo apt update
|
||||
sudo apt install -y hailo-h10-all=5.1.1
|
||||
|
||||
|
||||
@ -157,6 +157,8 @@ FROM base AS wheels
|
||||
ARG DEBIAN_FRONTEND
|
||||
ARG TARGETARCH
|
||||
ARG DEBUG=false
|
||||
ARG HAILORT_VERSION=4.21.0
|
||||
ARG HAILORT_GIT_REPO=frigate-nvr/hailort
|
||||
|
||||
# Use a separate container to build wheels to prevent build dependencies in final image
|
||||
RUN apt-get -qq update \
|
||||
|
||||
@ -2,13 +2,11 @@
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
hailo_version="4.21.0"
|
||||
|
||||
if [[ "${TARGETARCH}" == "amd64" ]]; then
|
||||
arch="x86_64"
|
||||
elif [[ "${TARGETARCH}" == "arm64" ]]; then
|
||||
arch="aarch64"
|
||||
fi
|
||||
|
||||
wget -qO- "https://github.com/frigate-nvr/hailort/releases/download/v${hailo_version}/hailort-debian12-${TARGETARCH}.tar.gz" | tar -C / -xzf -
|
||||
wget -P /wheels/ "https://github.com/frigate-nvr/hailort/releases/download/v${hailo_version}/hailort-${hailo_version}-cp311-cp311-linux_${arch}.whl"
|
||||
wget -qO- "https://github.com/${HAILORT_GIT_REPO}/releases/download/v${HAILORT_VERSION}/hailort-debian12-${TARGETARCH}.tar.gz" | tar -C / -xzf -
|
||||
wget -P /wheels/ "https://github.com/${HAILORT_GIT_REPO}/releases/download/v${HAILORT_VERSION}/hailort-${HAILORT_VERSION}-cp311-cp311-linux_${arch}.whl"
|
||||
|
||||
@ -36,6 +36,7 @@ from frigate.api.defs.response.chat_response import (
|
||||
)
|
||||
from frigate.api.defs.tags import Tags
|
||||
from frigate.api.event import events
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.genai.utils import build_assistant_message_for_conversation
|
||||
from frigate.jobs.vlm_watch import (
|
||||
get_vlm_watch_job,
|
||||
@ -401,9 +402,38 @@ def get_tools() -> JSONResponse:
|
||||
return JSONResponse(content={"tools": tools})
|
||||
|
||||
|
||||
def _resolve_zones(
|
||||
zones: List[str],
|
||||
config: FrigateConfig,
|
||||
target_cameras: List[str],
|
||||
) -> List[str]:
|
||||
"""Map zone names to their canonical config keys, case-insensitively.
|
||||
|
||||
LLMs frequently echo a user's casing ("Front Yard") instead of the
|
||||
configured key ("front_yard"). The downstream zone filter is a SQLite GLOB
|
||||
over the JSON-encoded zones column, which is case-sensitive — so an
|
||||
unnormalized name silently returns zero matches. Build a lookup over the
|
||||
relevant cameras' configured zones and substitute when we find a match;
|
||||
unknown names pass through so behavior matches what the model asked for.
|
||||
"""
|
||||
if not zones:
|
||||
return zones
|
||||
|
||||
lookup: Dict[str, str] = {}
|
||||
for camera_id in target_cameras:
|
||||
camera_config = config.cameras.get(camera_id)
|
||||
if camera_config is None:
|
||||
continue
|
||||
for zone_name in camera_config.zones.keys():
|
||||
lookup.setdefault(zone_name.lower(), zone_name)
|
||||
|
||||
return [lookup.get(z.lower(), z) for z in zones]
|
||||
|
||||
|
||||
async def _execute_search_objects(
|
||||
arguments: Dict[str, Any],
|
||||
allowed_cameras: List[str],
|
||||
config: FrigateConfig,
|
||||
) -> JSONResponse:
|
||||
"""
|
||||
Execute the search_objects tool.
|
||||
@ -437,6 +467,11 @@ async def _execute_search_objects(
|
||||
# Convert zones array to comma-separated string if provided
|
||||
zones = arguments.get("zones")
|
||||
if isinstance(zones, list):
|
||||
camera_arg = arguments.get("camera")
|
||||
target_cameras = (
|
||||
[camera_arg] if camera_arg and camera_arg != "all" else allowed_cameras
|
||||
)
|
||||
zones = _resolve_zones(zones, config, target_cameras)
|
||||
zones = ",".join(zones)
|
||||
elif zones is None:
|
||||
zones = "all"
|
||||
@ -528,6 +563,11 @@ async def _execute_find_similar_objects(
|
||||
sub_labels = arguments.get("sub_labels")
|
||||
zones = arguments.get("zones")
|
||||
|
||||
if zones:
|
||||
zones = _resolve_zones(
|
||||
zones, request.app.frigate_config, cameras or list(allowed_cameras)
|
||||
)
|
||||
|
||||
similarity_mode = arguments.get("similarity_mode", "fused")
|
||||
if similarity_mode not in ("visual", "semantic", "fused"):
|
||||
similarity_mode = "fused"
|
||||
@ -655,7 +695,9 @@ async def execute_tool(
|
||||
logger.debug(f"Executing tool: {tool_name} with arguments: {arguments}")
|
||||
|
||||
if tool_name == "search_objects":
|
||||
return await _execute_search_objects(arguments, allowed_cameras)
|
||||
return await _execute_search_objects(
|
||||
arguments, allowed_cameras, request.app.frigate_config
|
||||
)
|
||||
|
||||
if tool_name == "find_similar_objects":
|
||||
result = await _execute_find_similar_objects(
|
||||
@ -835,7 +877,9 @@ async def _execute_tool_internal(
|
||||
This is used by the chat completion endpoint to execute tools.
|
||||
"""
|
||||
if tool_name == "search_objects":
|
||||
response = await _execute_search_objects(arguments, allowed_cameras)
|
||||
response = await _execute_search_objects(
|
||||
arguments, allowed_cameras, request.app.frigate_config
|
||||
)
|
||||
try:
|
||||
if hasattr(response, "body"):
|
||||
body_str = response.body.decode("utf-8")
|
||||
@ -899,6 +943,9 @@ async def _execute_start_camera_watch(
|
||||
|
||||
await require_camera_access(camera, request=request)
|
||||
|
||||
if zones:
|
||||
zones = _resolve_zones(zones, config, [camera])
|
||||
|
||||
genai_manager = request.app.genai_manager
|
||||
chat_client = genai_manager.chat_client
|
||||
if chat_client is None or not chat_client.supports_vision:
|
||||
|
||||
@ -39,6 +39,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
RECORDING_BUFFER_EXTENSION_PERCENT = 0.10
|
||||
MIN_RECORDING_DURATION = 10
|
||||
MAX_IMAGE_TOKENS = 24000
|
||||
MAX_FRAMES_PER_SECOND = 2
|
||||
|
||||
|
||||
class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
@ -60,14 +62,22 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
def calculate_frame_count(
|
||||
self,
|
||||
camera: str,
|
||||
duration: float,
|
||||
image_source: ImageSourceEnum = ImageSourceEnum.preview,
|
||||
height: int = 480,
|
||||
) -> int:
|
||||
"""Calculate optimal number of frames based on context size, image source, and resolution.
|
||||
"""Calculate optimal number of frames based on event duration, context size,
|
||||
image source, and resolution.
|
||||
|
||||
Token usage varies by resolution: larger images (ultra-wide aspect ratios) use more tokens.
|
||||
Estimates ~1 token per 1250 pixels. Targets 98% context utilization with safety margin.
|
||||
Capped at 20 frames.
|
||||
Per-image token cost is asked of the GenAI provider so providers that know
|
||||
their model's true cost (e.g. llama.cpp can probe the loaded mmproj) can
|
||||
diverge from the default ~1-token-per-1250-pixels heuristic. The frame
|
||||
budget is bounded by:
|
||||
- remaining context window after prompt + response reservations
|
||||
- a fixed MAX_IMAGE_TOKENS ceiling
|
||||
- MAX_FRAMES_PER_SECOND x duration, to avoid drowning short events in
|
||||
near-duplicate frames where the model latches onto the redundant middle
|
||||
and skips the start/end action
|
||||
"""
|
||||
client = self.genai_manager.description_client
|
||||
|
||||
@ -105,14 +115,15 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
width = target_width
|
||||
height = int(target_width / aspect_ratio)
|
||||
|
||||
pixels_per_image = width * height
|
||||
tokens_per_image = pixels_per_image / 1250
|
||||
tokens_per_image = client.estimate_image_tokens(width, height)
|
||||
prompt_tokens = 3800
|
||||
response_tokens = 300
|
||||
available_tokens = context_size - prompt_tokens - response_tokens
|
||||
max_frames = int(available_tokens / tokens_per_image)
|
||||
|
||||
return min(max(max_frames, 3), 20)
|
||||
context_budget = context_size - prompt_tokens - response_tokens
|
||||
image_token_budget = min(context_budget, MAX_IMAGE_TOKENS)
|
||||
max_frames_by_tokens = int(image_token_budget / tokens_per_image)
|
||||
max_frames_by_duration = int(duration * MAX_FRAMES_PER_SECOND)
|
||||
max_frames = min(max_frames_by_tokens, max_frames_by_duration)
|
||||
return max(max_frames, 3)
|
||||
|
||||
def process_data(
|
||||
self, data: dict[str, Any], data_type: PostProcessDataEnum
|
||||
@ -376,7 +387,9 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
all_frames.append(os.path.join(preview_dir, file))
|
||||
|
||||
frame_count = len(all_frames)
|
||||
desired_frame_count = self.calculate_frame_count(camera)
|
||||
desired_frame_count = self.calculate_frame_count(
|
||||
camera, duration=end_time - start_time
|
||||
)
|
||||
|
||||
if frame_count <= desired_frame_count:
|
||||
return all_frames
|
||||
@ -400,7 +413,7 @@ class ReviewDescriptionProcessor(PostProcessorApi):
|
||||
"""Get frames from recordings at specified timestamps."""
|
||||
duration = end_time - start_time
|
||||
desired_frame_count = self.calculate_frame_count(
|
||||
camera, ImageSourceEnum.recordings, height
|
||||
camera, duration, ImageSourceEnum.recordings, height
|
||||
)
|
||||
|
||||
# Calculate evenly spaced timestamps throughout the duration
|
||||
|
||||
@ -4,12 +4,14 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||
class ReviewMetadata(BaseModel):
|
||||
model_config = ConfigDict(extra="ignore", protected_namespaces=())
|
||||
|
||||
observations: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Chronological list of significant observations from the frames, written before the scene narrative is composed.",
|
||||
)
|
||||
title: str = Field(
|
||||
description="A short title characterizing what took place and where, under 10 words."
|
||||
)
|
||||
scene: str = Field(
|
||||
min_length=120,
|
||||
max_length=600,
|
||||
description="A chronological narrative of what happens from start to finish.",
|
||||
)
|
||||
shortSummary: str = Field(
|
||||
|
||||
415
frigate/detectors/plugins/hailo10h.py
Executable file
415
frigate/detectors/plugins/hailo10h.py
Executable file
@ -0,0 +1,415 @@
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import urllib.request
|
||||
from functools import partial
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pydantic import ConfigDict, Field
|
||||
from typing_extensions import Literal
|
||||
|
||||
from frigate.const import MODEL_CACHE_DIR
|
||||
from frigate.detectors.detection_api import DetectionApi
|
||||
from frigate.detectors.detector_config import (
|
||||
BaseDetectorConfig,
|
||||
)
|
||||
from frigate.object_detection.util import RequestStore, ResponseStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ----------------- Utility Functions ----------------- #
|
||||
|
||||
|
||||
def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarray:
|
||||
"""
|
||||
Resize an image with unchanged aspect ratio using padding.
|
||||
Assumes input image shape is (H, W, 3).
|
||||
"""
|
||||
if image.ndim == 4 and image.shape[0] == 1:
|
||||
image = image[0]
|
||||
|
||||
h, w = image.shape[:2]
|
||||
scale = min(model_w / w, model_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
|
||||
padded_image = np.full((model_h, model_w, 3), 114, dtype=image.dtype)
|
||||
x_offset = (model_w - new_w) // 2
|
||||
y_offset = (model_h - new_h) // 2
|
||||
padded_image[y_offset : y_offset + new_h, x_offset : x_offset + new_w] = (
|
||||
resized_image
|
||||
)
|
||||
return padded_image
|
||||
|
||||
|
||||
# ----------------- Global Constants ----------------- #
|
||||
DETECTOR_KEY = "hailo10h"
|
||||
ARCH = None
|
||||
H10H_DEFAULT_MODEL = "yolov6n.hef"
|
||||
H10H_DEFAULT_URL = "https://hailo-model-zoo.s3.eu-west-2.amazonaws.com/ModelZoo/Compiled/v5.2.0/hailo10h/yolov6n.hef"
|
||||
|
||||
|
||||
def detect_hailo_arch():
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["hailortcli", "fw-control", "identify"], capture_output=True, text=True
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Inference error: {result.stderr}")
|
||||
return None
|
||||
for line in result.stdout.split("\n"):
|
||||
if "Device Architecture" in line:
|
||||
if "HAILO10H" in line:
|
||||
return "hailo10h"
|
||||
logger.error("Inference error: Could not determine Hailo architecture.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Inference error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# ----------------- HailoAsyncInference Class ----------------- #
|
||||
class HailoAsyncInference:
|
||||
def __init__(
|
||||
self,
|
||||
hef_path: str,
|
||||
input_store: RequestStore,
|
||||
output_store: ResponseStore,
|
||||
batch_size: int = 1,
|
||||
input_type: Optional[str] = None,
|
||||
output_type: Optional[Dict[str, str]] = None,
|
||||
send_original_frame: bool = False,
|
||||
) -> None:
|
||||
# when importing hailo it activates the driver
|
||||
# which leaves processes running even though it may not be used.
|
||||
try:
|
||||
from hailo_platform import (
|
||||
HEF,
|
||||
FormatType,
|
||||
HailoSchedulingAlgorithm,
|
||||
VDevice,
|
||||
)
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
|
||||
self.input_store = input_store
|
||||
self.output_store = output_store
|
||||
|
||||
params = VDevice.create_params()
|
||||
params.scheduling_algorithm = HailoSchedulingAlgorithm.ROUND_ROBIN
|
||||
|
||||
self.hef = HEF(hef_path)
|
||||
self.target = VDevice(params)
|
||||
self.infer_model = self.target.create_infer_model(hef_path)
|
||||
self.infer_model.set_batch_size(batch_size)
|
||||
|
||||
if input_type is not None:
|
||||
self.infer_model.input().set_format_type(getattr(FormatType, input_type))
|
||||
|
||||
if output_type is not None:
|
||||
for output_name, output_type in output_type.items():
|
||||
self.infer_model.output(output_name).set_format_type(
|
||||
getattr(FormatType, output_type)
|
||||
)
|
||||
|
||||
self.output_type = output_type
|
||||
self.send_original_frame = send_original_frame
|
||||
|
||||
def callback(
|
||||
self,
|
||||
completion_info,
|
||||
bindings_list: List,
|
||||
input_batch: List,
|
||||
request_ids: List[int],
|
||||
):
|
||||
if completion_info.exception:
|
||||
logger.error(f"Inference error: {completion_info.exception}")
|
||||
else:
|
||||
for i, bindings in enumerate(bindings_list):
|
||||
if len(bindings._output_names) == 1:
|
||||
result = bindings.output().get_buffer()
|
||||
else:
|
||||
result = {
|
||||
name: np.expand_dims(bindings.output(name).get_buffer(), axis=0)
|
||||
for name in bindings._output_names
|
||||
}
|
||||
self.output_store.put(request_ids[i], (input_batch[i], result))
|
||||
|
||||
def _create_bindings(self, configured_infer_model) -> object:
|
||||
if self.output_type is None:
|
||||
output_buffers = {
|
||||
output_info.name: np.empty(
|
||||
self.infer_model.output(output_info.name).shape,
|
||||
dtype=getattr(
|
||||
np, str(output_info.format.type).split(".")[1].lower()
|
||||
),
|
||||
)
|
||||
for output_info in self.hef.get_output_vstream_infos()
|
||||
}
|
||||
else:
|
||||
output_buffers = {
|
||||
name: np.empty(
|
||||
self.infer_model.output(name).shape,
|
||||
dtype=getattr(np, self.output_type[name].lower()),
|
||||
)
|
||||
for name in self.output_type
|
||||
}
|
||||
return configured_infer_model.create_bindings(output_buffers=output_buffers)
|
||||
|
||||
def get_input_shape(self) -> Tuple[int, ...]:
|
||||
return self.hef.get_input_vstream_infos()[0].shape
|
||||
|
||||
def run(self) -> None:
|
||||
job = None
|
||||
with self.infer_model.configure() as configured_infer_model:
|
||||
while True:
|
||||
batch_data = self.input_store.get()
|
||||
|
||||
if batch_data is None:
|
||||
break
|
||||
|
||||
request_id, frame_data = batch_data
|
||||
preprocessed_batch = [frame_data]
|
||||
request_ids = [request_id]
|
||||
input_batch = preprocessed_batch # non-send_original_frame mode
|
||||
|
||||
bindings_list = []
|
||||
for frame in preprocessed_batch:
|
||||
bindings = self._create_bindings(configured_infer_model)
|
||||
bindings.input().set_buffer(np.array(frame))
|
||||
bindings_list.append(bindings)
|
||||
configured_infer_model.wait_for_async_ready(timeout_ms=10000)
|
||||
job = configured_infer_model.run_async(
|
||||
bindings_list,
|
||||
partial(
|
||||
self.callback,
|
||||
input_batch=input_batch,
|
||||
request_ids=request_ids,
|
||||
bindings_list=bindings_list,
|
||||
),
|
||||
)
|
||||
|
||||
if job is not None:
|
||||
job.wait(100)
|
||||
|
||||
|
||||
# ----------------- HailoDetector Class ----------------- #
|
||||
class HailoDetector(DetectionApi):
|
||||
type_key = DETECTOR_KEY
|
||||
|
||||
def __init__(self, detector_config: "HailoDetectorConfig"):
|
||||
global ARCH
|
||||
ARCH = detect_hailo_arch()
|
||||
self.cache_dir = MODEL_CACHE_DIR
|
||||
self.device_type = detector_config.device
|
||||
self.model_height = (
|
||||
detector_config.model.height
|
||||
if hasattr(detector_config.model, "height")
|
||||
else None
|
||||
)
|
||||
self.model_width = (
|
||||
detector_config.model.width
|
||||
if hasattr(detector_config.model, "width")
|
||||
else None
|
||||
)
|
||||
self.model_type = (
|
||||
detector_config.model.model_type
|
||||
if hasattr(detector_config.model, "model_type")
|
||||
else None
|
||||
)
|
||||
self.tensor_format = (
|
||||
detector_config.model.input_tensor
|
||||
if hasattr(detector_config.model, "input_tensor")
|
||||
else None
|
||||
)
|
||||
self.pixel_format = (
|
||||
detector_config.model.input_pixel_format
|
||||
if hasattr(detector_config.model, "input_pixel_format")
|
||||
else None
|
||||
)
|
||||
self.input_dtype = (
|
||||
detector_config.model.input_dtype
|
||||
if hasattr(detector_config.model, "input_dtype")
|
||||
else None
|
||||
)
|
||||
self.output_type = "FLOAT32"
|
||||
self.set_path_and_url(detector_config.model.path)
|
||||
self.working_model_path = self.check_and_prepare()
|
||||
|
||||
self.batch_size = 1
|
||||
self.input_store = RequestStore()
|
||||
self.response_store = ResponseStore()
|
||||
|
||||
try:
|
||||
logger.debug(f"[INIT] Loading HEF model from {self.working_model_path}")
|
||||
self.inference_engine = HailoAsyncInference(
|
||||
self.working_model_path,
|
||||
self.input_store,
|
||||
self.response_store,
|
||||
self.batch_size,
|
||||
)
|
||||
self.input_shape = self.inference_engine.get_input_shape()
|
||||
logger.debug(f"[INIT] Model input shape: {self.input_shape}")
|
||||
self.inference_thread = threading.Thread(
|
||||
target=self.inference_engine.run, daemon=True
|
||||
)
|
||||
self.inference_thread.start()
|
||||
except Exception as e:
|
||||
logger.error(f"[INIT] Failed to initialize HailoAsyncInference: {e}")
|
||||
raise
|
||||
|
||||
def set_path_and_url(self, path: str = None):
|
||||
if not path:
|
||||
self.model_path = None
|
||||
self.url = None
|
||||
return
|
||||
if self.is_url(path):
|
||||
self.url = path
|
||||
self.model_path = None
|
||||
else:
|
||||
self.model_path = path
|
||||
self.url = None
|
||||
|
||||
def is_url(self, url: str) -> bool:
|
||||
return (
|
||||
url.startswith("http://")
|
||||
or url.startswith("https://")
|
||||
or url.startswith("www.")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def extract_model_name(path: str = None, url: str = None) -> str:
|
||||
if path and path.endswith(".hef"):
|
||||
return os.path.basename(path)
|
||||
elif url and url.endswith(".hef"):
|
||||
return os.path.basename(url)
|
||||
else:
|
||||
return H10H_DEFAULT_MODEL
|
||||
|
||||
@staticmethod
|
||||
def download_model(url: str, destination: str):
|
||||
if not url.endswith(".hef"):
|
||||
raise ValueError("Invalid model URL. Only .hef files are supported.")
|
||||
try:
|
||||
urllib.request.urlretrieve(url, destination)
|
||||
logger.debug(f"Downloaded model to {destination}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to download model from {url}: {str(e)}")
|
||||
|
||||
def check_and_prepare(self) -> str:
|
||||
if not os.path.exists(self.cache_dir):
|
||||
os.makedirs(self.cache_dir)
|
||||
model_name = self.extract_model_name(self.model_path, self.url)
|
||||
cached_model_path = os.path.join(self.cache_dir, model_name)
|
||||
if not self.model_path and not self.url:
|
||||
if os.path.exists(cached_model_path):
|
||||
logger.debug(f"Model found in cache: {cached_model_path}")
|
||||
return cached_model_path
|
||||
else:
|
||||
logger.debug(f"Downloading default model: {model_name}")
|
||||
self.download_model(H10H_DEFAULT_URL, cached_model_path)
|
||||
|
||||
elif self.url:
|
||||
logger.debug(f"Downloading model from URL: {self.url}")
|
||||
self.download_model(self.url, cached_model_path)
|
||||
elif self.model_path:
|
||||
if os.path.exists(self.model_path):
|
||||
logger.debug(f"Using existing model at: {self.model_path}")
|
||||
return self.model_path
|
||||
else:
|
||||
raise FileNotFoundError(f"Model file not found at: {self.model_path}")
|
||||
return cached_model_path
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
tensor_input = self.preprocess(tensor_input)
|
||||
|
||||
if isinstance(tensor_input, np.ndarray) and len(tensor_input.shape) == 3:
|
||||
tensor_input = np.expand_dims(tensor_input, axis=0)
|
||||
|
||||
request_id = self.input_store.put(tensor_input)
|
||||
|
||||
try:
|
||||
_, infer_results = self.response_store.get(request_id, timeout=1.0)
|
||||
except TimeoutError:
|
||||
logger.error(
|
||||
f"Timeout waiting for inference results for request {request_id}"
|
||||
)
|
||||
|
||||
if not self.inference_thread.is_alive():
|
||||
raise RuntimeError(
|
||||
"HailoRT inference thread has stopped, restart required."
|
||||
)
|
||||
|
||||
return np.zeros((20, 6), dtype=np.float32)
|
||||
|
||||
if isinstance(infer_results, list) and len(infer_results) == 1:
|
||||
infer_results = infer_results[0]
|
||||
|
||||
threshold = 0.4
|
||||
all_detections = []
|
||||
for class_id, detection_set in enumerate(infer_results):
|
||||
if not isinstance(detection_set, np.ndarray) or detection_set.size == 0:
|
||||
continue
|
||||
for det in detection_set:
|
||||
if det.shape[0] < 5:
|
||||
continue
|
||||
score = float(det[4])
|
||||
if score < threshold:
|
||||
continue
|
||||
all_detections.append([class_id, score, det[0], det[1], det[2], det[3]])
|
||||
|
||||
if len(all_detections) == 0:
|
||||
detections_array = np.zeros((20, 6), dtype=np.float32)
|
||||
else:
|
||||
detections_array = np.array(all_detections, dtype=np.float32)
|
||||
if detections_array.shape[0] > 20:
|
||||
detections_array = detections_array[:20, :]
|
||||
elif detections_array.shape[0] < 20:
|
||||
pad = np.zeros((20 - detections_array.shape[0], 6), dtype=np.float32)
|
||||
detections_array = np.vstack((detections_array, pad))
|
||||
|
||||
return detections_array
|
||||
|
||||
def preprocess(self, image):
|
||||
if isinstance(image, np.ndarray):
|
||||
processed = preprocess_tensor(
|
||||
image, self.input_shape[1], self.input_shape[0]
|
||||
)
|
||||
return np.expand_dims(processed, axis=0)
|
||||
else:
|
||||
raise ValueError("Unsupported image format for preprocessing")
|
||||
|
||||
def close(self):
|
||||
"""Properly shuts down the inference engine and releases the VDevice."""
|
||||
logger.debug("[CLOSE] Closing HailoDetector")
|
||||
try:
|
||||
if hasattr(self, "inference_engine"):
|
||||
if hasattr(self.inference_engine, "target"):
|
||||
self.inference_engine.target.release()
|
||||
logger.debug("Hailo VDevice released successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to close Hailo device: {e}")
|
||||
raise
|
||||
|
||||
def __del__(self):
|
||||
"""Destructor to ensure cleanup when the object is deleted."""
|
||||
self.close()
|
||||
|
||||
|
||||
# ----------------- HailoDetectorConfig Class ----------------- #
|
||||
class HailoDetectorConfig(BaseDetectorConfig):
|
||||
"""Hailo10H detector using HEF models and the HailoRT SDK for inference on Hailo hardware."""
|
||||
|
||||
model_config = ConfigDict(
|
||||
title="Hailo-10H",
|
||||
)
|
||||
|
||||
type: Literal[DETECTOR_KEY]
|
||||
device: str = Field(
|
||||
default="PCIe",
|
||||
title="Device Type",
|
||||
description="The device to use for Hailo inference (e.g. 'PCIe', 'M.2').",
|
||||
)
|
||||
@ -151,6 +151,50 @@ Each line represents a detection state, not necessarily unique individuals. The
|
||||
if "other_concerns" in schema.get("required", []):
|
||||
schema["required"].remove("other_concerns")
|
||||
|
||||
# Length hints injected into the schema as suggestions to the model
|
||||
# (enforced by grammar-based providers like llama.cpp) but kept off the
|
||||
# Pydantic model so a non-compliant response does not fail validation.
|
||||
length_hints = {
|
||||
"scene": {"minLength": 120, "maxLength": 600},
|
||||
"shortSummary": {"minLength": 70, "maxLength": 100},
|
||||
}
|
||||
for field, hints in length_hints.items():
|
||||
prop = schema.get("properties", {}).get(field)
|
||||
if prop is not None:
|
||||
prop.update(hints)
|
||||
|
||||
# observations is a chain-of-thought-by-schema field: forcing the model
|
||||
# to enumerate concrete facts before writing scene/title surfaces details
|
||||
# the narrative would otherwise gloss past (e.g. brief vehicle arrivals
|
||||
# overshadowed by a longer activity). The minItems floor scales with
|
||||
# event duration so longer clips get more observations.
|
||||
observations_prop = schema.get("properties", {}).get("observations")
|
||||
if observations_prop is not None:
|
||||
duration_seconds = float(review_data.get("duration") or 0)
|
||||
min_observations = max(3, round(duration_seconds / 5))
|
||||
max_observations = min_observations + 8
|
||||
observations_prop["description"] = (
|
||||
"Enumerate the significant observations across all frames, in "
|
||||
"chronological order, BEFORE composing the scene narrative. "
|
||||
"Include the very start of the activity — for example, a "
|
||||
"vehicle entering the frame or pulling into the driveway — "
|
||||
"even if it lasts only a few frames and the rest of the clip "
|
||||
"is dominated by a longer activity. Include each arrival, "
|
||||
"departure, motion event, object handled, and notable change "
|
||||
"in position or state. Each item is a single concrete fact "
|
||||
"written as a complete sentence (e.g., 'A blue sedan turns "
|
||||
"from the street into the driveway', 'Nick exits the driver "
|
||||
"side carrying a plant pot'). Do not summarize, interpret, or "
|
||||
"assign meaning here — that belongs in the scene field."
|
||||
)
|
||||
observations_prop["minItems"] = min_observations
|
||||
observations_prop["maxItems"] = max_observations
|
||||
observations_prop["items"] = {"type": "string", "minLength": 20}
|
||||
|
||||
required = schema.setdefault("required", [])
|
||||
if "observations" not in required:
|
||||
required.append("observations")
|
||||
|
||||
# OpenAI strict mode requires additionalProperties: false on all objects
|
||||
schema["additionalProperties"] = False
|
||||
|
||||
@ -344,6 +388,14 @@ Guidelines:
|
||||
"""Get the context window size for this provider in tokens."""
|
||||
return 4096
|
||||
|
||||
def estimate_image_tokens(self, width: int, height: int) -> float:
|
||||
"""Estimate prompt tokens consumed by a single image of the given dimensions.
|
||||
|
||||
Default heuristic: ~1 token per 1250 pixels. Providers that can measure or
|
||||
know their model's exact image-token cost should override.
|
||||
"""
|
||||
return (width * height) / 1250
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
|
||||
@ -42,6 +42,8 @@ class LlamaCppClient(GenAIClient):
|
||||
_supports_vision: bool
|
||||
_supports_audio: bool
|
||||
_supports_tools: bool
|
||||
_image_token_cache: dict[tuple[int, int], int]
|
||||
_text_baseline_tokens: int | None
|
||||
|
||||
def _init_provider(self) -> str | None:
|
||||
"""Initialize the client and query model metadata from the server."""
|
||||
@ -52,6 +54,8 @@ class LlamaCppClient(GenAIClient):
|
||||
self._supports_vision = False
|
||||
self._supports_audio = False
|
||||
self._supports_tools = False
|
||||
self._image_token_cache = {}
|
||||
self._text_baseline_tokens = None
|
||||
|
||||
base_url = (
|
||||
self.genai_config.base_url.rstrip("/")
|
||||
@ -272,6 +276,91 @@ class LlamaCppClient(GenAIClient):
|
||||
return self._context_size
|
||||
return 4096
|
||||
|
||||
def estimate_image_tokens(self, width: int, height: int) -> float:
|
||||
"""Probe the llama.cpp server to learn the model's image-token cost at the
|
||||
requested dimensions.
|
||||
|
||||
llama.cpp's image tokenization is a deterministic function of dimensions and
|
||||
the loaded mmproj, so the result is cached per (width, height) for the
|
||||
lifetime of the process. Falls back to the base pixel heuristic if the
|
||||
server is unreachable or the response is malformed.
|
||||
"""
|
||||
if self.provider is None:
|
||||
return super().estimate_image_tokens(width, height)
|
||||
|
||||
cached = self._image_token_cache.get((width, height))
|
||||
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
try:
|
||||
baseline = self._probe_baseline_tokens()
|
||||
with_image = self._probe_image_prompt_tokens(width, height)
|
||||
tokens = max(1, with_image - baseline)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"llama.cpp image-token probe failed for %dx%d (%s); using heuristic",
|
||||
width,
|
||||
height,
|
||||
e,
|
||||
)
|
||||
return super().estimate_image_tokens(width, height)
|
||||
|
||||
self._image_token_cache[(width, height)] = tokens
|
||||
logger.debug(
|
||||
"llama.cpp model '%s' uses ~%d tokens for %dx%d images",
|
||||
self.genai_config.model,
|
||||
tokens,
|
||||
width,
|
||||
height,
|
||||
)
|
||||
return tokens
|
||||
|
||||
def _probe_baseline_tokens(self) -> int:
|
||||
"""Return prompt_tokens for a minimal text-only request. Cached after first call."""
|
||||
if self._text_baseline_tokens is not None:
|
||||
return self._text_baseline_tokens
|
||||
|
||||
self._text_baseline_tokens = self._probe_prompt_tokens(
|
||||
[{"type": "text", "text": "."}]
|
||||
)
|
||||
return self._text_baseline_tokens
|
||||
|
||||
def _probe_image_prompt_tokens(self, width: int, height: int) -> int:
|
||||
"""Return prompt_tokens for a single synthetic image plus minimal text."""
|
||||
img = Image.new("RGB", (width, height), (128, 128, 128))
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="JPEG", quality=60)
|
||||
encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
|
||||
return self._probe_prompt_tokens(
|
||||
[
|
||||
{"type": "text", "text": "."},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{encoded}"},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
def _probe_prompt_tokens(self, content: list[dict[str, Any]]) -> int:
|
||||
"""POST a 1-token chat completion and return reported prompt_tokens.
|
||||
|
||||
Uses a generous timeout to absorb a cold model load on the first probe
|
||||
when the server lazily loads models on demand (e.g. llama-swap).
|
||||
"""
|
||||
payload = {
|
||||
"model": self.genai_config.model,
|
||||
"messages": [{"role": "user", "content": content}],
|
||||
"max_tokens": 1,
|
||||
}
|
||||
response = requests.post(
|
||||
f"{self.provider}/v1/chat/completions",
|
||||
json=payload,
|
||||
timeout=60,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return int(response.json()["usage"]["prompt_tokens"])
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
|
||||
@ -123,6 +123,15 @@ def get_detector_temperature(
|
||||
if index < len(hailo_device_names):
|
||||
device_name = hailo_device_names[index]
|
||||
return hailo_temps[device_name]
|
||||
elif detector_type == "hailo10h":
|
||||
# Get temperatures for Hailo devices
|
||||
hailo_temps = get_hailo_temps()
|
||||
if hailo_temps:
|
||||
hailo_device_names = sorted(hailo_temps.keys())
|
||||
index = detector_index_by_type.get("hailo10h", 0)
|
||||
if index < len(hailo_device_names):
|
||||
device_name = hailo_device_names[index]
|
||||
return hailo_temps[device_name]
|
||||
elif detector_type == "rknn":
|
||||
# Rockchip temperatures are handled by the GPU / NPU stats
|
||||
# as there are not detector specific temperatures
|
||||
|
||||
@ -257,6 +257,7 @@
|
||||
"export": "Export",
|
||||
"actions": "Actions",
|
||||
"uiPlayground": "UI Playground",
|
||||
"features": "Features",
|
||||
"faceLibrary": "Face Library",
|
||||
"classification": "Classification",
|
||||
"chat": "Chat",
|
||||
|
||||
@ -397,6 +397,14 @@
|
||||
"description": "The device to use for Hailo inference (e.g. 'PCIe', 'M.2')."
|
||||
}
|
||||
},
|
||||
"hailo10h": {
|
||||
"label": "Hailo-10H",
|
||||
"description": "Hailo-10H detector using HEF models and the HailoRT SDK for inference on Hailo hardware.",
|
||||
"device": {
|
||||
"label": "Device Type",
|
||||
"description": "The device to use for Hailo inference (e.g. 'PCIe', 'M.2')."
|
||||
}
|
||||
},
|
||||
"memryx": {
|
||||
"label": "MemryX",
|
||||
"description": "MemryX MX3 detector that runs compiled DFP models on MemryX accelerators.",
|
||||
|
||||
@ -161,13 +161,13 @@ export function AnimatedEventCard({
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
className={cn(
|
||||
"absolute left-2 top-1 z-40 transition-opacity",
|
||||
"absolute left-2 top-1 z-40 bg-gray-500 bg-gradient-to-br from-gray-400 to-gray-500 transition-opacity",
|
||||
threatLevel === ThreatLevel.SECURITY_CONCERN &&
|
||||
"pointer-events-auto bg-severity_alert opacity-100 hover:bg-severity_alert",
|
||||
"pointer-events-auto opacity-100",
|
||||
threatLevel === ThreatLevel.NEEDS_REVIEW &&
|
||||
"pointer-events-auto bg-severity_detection opacity-100 hover:bg-severity_detection",
|
||||
"pointer-events-auto opacity-100",
|
||||
threatLevel === ThreatLevel.NORMAL &&
|
||||
"pointer-events-none bg-gray-500 bg-gradient-to-br from-gray-400 to-gray-500 opacity-0 group-hover:pointer-events-auto group-hover:opacity-100",
|
||||
"pointer-events-none opacity-0 group-hover:pointer-events-auto group-hover:opacity-100",
|
||||
)}
|
||||
size="xs"
|
||||
aria-label={t("markAsReviewed")}
|
||||
|
||||
@ -155,14 +155,40 @@ export function MessageBubble({
|
||||
) : (
|
||||
<div
|
||||
className={cn(
|
||||
"[&>*:last-child]:inline",
|
||||
!isComplete &&
|
||||
"after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
|
||||
"[&>p:last-child]:inline after:ml-0.5 after:inline-block after:h-4 after:w-2 after:animate-cursor-blink after:rounded-sm after:bg-foreground after:align-middle after:content-['']",
|
||||
)}
|
||||
>
|
||||
<ReactMarkdown
|
||||
remarkPlugins={[remarkGfm]}
|
||||
components={{
|
||||
p: ({ node: _n, ...props }) => (
|
||||
<p className="my-2 first:mt-0 last:mb-0" {...props} />
|
||||
),
|
||||
ul: ({ node: _n, ...props }) => (
|
||||
<ul
|
||||
className="my-2 list-disc space-y-1 pl-6 first:mt-0 last:mb-0"
|
||||
{...props}
|
||||
/>
|
||||
),
|
||||
ol: ({ node: _n, ...props }) => (
|
||||
<ol
|
||||
className="my-2 list-decimal space-y-1 pl-6 first:mt-0 last:mb-0"
|
||||
{...props}
|
||||
/>
|
||||
),
|
||||
li: ({ node: _n, ...props }) => (
|
||||
<li className="pl-1" {...props} />
|
||||
),
|
||||
code: ({ node: _n, className, ...props }) => (
|
||||
<code
|
||||
className={cn(
|
||||
"rounded bg-foreground/10 px-1 py-0.5 font-mono text-sm",
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
),
|
||||
table: ({ node: _n, ...props }) => (
|
||||
<table
|
||||
className="my-2 w-full border-collapse border border-border"
|
||||
|
||||
@ -14,7 +14,6 @@ import Step3ChooseExamples, {
|
||||
Step3FormData,
|
||||
} from "./wizard/Step3ChooseExamples";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { isDesktop } from "react-device-detect";
|
||||
import axios from "axios";
|
||||
|
||||
const OBJECT_STEPS = [
|
||||
@ -153,13 +152,9 @@ export default function ClassificationModelWizardDialog({
|
||||
>
|
||||
<DialogContent
|
||||
className={cn(
|
||||
"",
|
||||
isDesktop &&
|
||||
wizardState.currentStep == 0 &&
|
||||
"max-h-[90%] overflow-y-auto xl:max-h-[80%]",
|
||||
isDesktop &&
|
||||
wizardState.currentStep > 0 &&
|
||||
"max-h-[90%] max-w-[70%] overflow-y-auto xl:max-h-[80%]",
|
||||
"scrollbar-container max-h-[90%] overflow-y-auto",
|
||||
wizardState.currentStep == 0 && "xl:max-h-[80%]",
|
||||
wizardState.currentStep > 0 && "md:max-w-[70%] xl:max-h-[80%]",
|
||||
)}
|
||||
onInteractOutside={(e) => {
|
||||
e.preventDefault();
|
||||
|
||||
@ -6,6 +6,7 @@ import {
|
||||
LuLifeBuoy,
|
||||
LuList,
|
||||
LuLogOut,
|
||||
LuMessageSquare,
|
||||
LuMoon,
|
||||
LuSquarePen,
|
||||
LuScanFace,
|
||||
@ -482,21 +483,25 @@ export default function GeneralSettings({ className }: GeneralSettingsProps) {
|
||||
</Link>
|
||||
</>
|
||||
)}
|
||||
{isAdmin && isMobile && config?.face_recognition.enabled && (
|
||||
<>
|
||||
<Link to="/faces">
|
||||
<MenuItem
|
||||
className="flex w-full items-center p-2 text-sm"
|
||||
aria-label={t("menu.faceLibrary")}
|
||||
>
|
||||
<LuScanFace className="mr-2 size-4" />
|
||||
<span>{t("menu.faceLibrary")}</span>
|
||||
</MenuItem>
|
||||
</Link>
|
||||
</>
|
||||
)}
|
||||
{isAdmin && isMobile && (
|
||||
<>
|
||||
</DropdownMenuGroup>
|
||||
{isMobile && isAdmin && (
|
||||
<>
|
||||
<DropdownMenuLabel className="mt-1">
|
||||
{t("menu.features")}
|
||||
</DropdownMenuLabel>
|
||||
<DropdownMenuSeparator />
|
||||
<DropdownMenuGroup className="flex flex-col">
|
||||
{config?.face_recognition.enabled && (
|
||||
<Link to="/faces">
|
||||
<MenuItem
|
||||
className="flex w-full items-center p-2 text-sm"
|
||||
aria-label={t("menu.faceLibrary")}
|
||||
>
|
||||
<LuScanFace className="mr-2 size-4" />
|
||||
<span>{t("menu.faceLibrary")}</span>
|
||||
</MenuItem>
|
||||
</Link>
|
||||
)}
|
||||
<Link to="/classification">
|
||||
<MenuItem
|
||||
className="flex w-full items-center p-2 text-sm"
|
||||
@ -506,9 +511,20 @@ export default function GeneralSettings({ className }: GeneralSettingsProps) {
|
||||
<span>{t("menu.classification")}</span>
|
||||
</MenuItem>
|
||||
</Link>
|
||||
</>
|
||||
)}
|
||||
</DropdownMenuGroup>
|
||||
{config?.genai?.model !== "none" && (
|
||||
<Link to="/chat">
|
||||
<MenuItem
|
||||
className="flex w-full items-center p-2 text-sm"
|
||||
aria-label={t("menu.chat")}
|
||||
>
|
||||
<LuMessageSquare className="mr-2 size-4" />
|
||||
<span>{t("menu.chat")}</span>
|
||||
</MenuItem>
|
||||
</Link>
|
||||
)}
|
||||
</DropdownMenuGroup>
|
||||
</>
|
||||
)}
|
||||
<DropdownMenuLabel className={isDesktop ? "mt-3" : "mt-1"}>
|
||||
{t("menu.appearance")}
|
||||
</DropdownMenuLabel>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user