mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-03-14 20:37:34 +03:00
* use react-jsonschema-form for UI config * don't use properties wrapper when generating config i18n json * configure for full i18n support * section fields * add descriptions to all fields for i18n * motion i18n * fix nullable fields * sanitize internal fields * add switches widgets and use friendly names * fix nullable schema entries * ensure update_topic is added to api calls this needs further backend implementation to work correctly * add global sections, camera config overrides, and reset button * i18n * add reset logic to global config view * tweaks * fix sections and live validation * fix validation for schema objects that can be null * generic and custom per-field validation * improve generic error validation messages * remove show advanced fields switch * tweaks * use shadcn theme * fix array field template * i18n tweaks * remove collapsible around root section * deep merge schema for advanced fields * add array field item template and fix ffmpeg section * add missing i18n keys * tweaks * comment out api call for testing * add config groups as a separate i18n namespace * add descriptions to all pydantic fields * make titles more concise * new titles as i18n * update i18n config generation script to use json schema * tweaks * tweaks * rebase * clean up * form tweaks * add wildcards and fix object filter fields * add field template for additionalproperties schema objects * improve typing * add section description from schema and clarify global vs camera level descriptions * separate and consolidate global and camera i18n namespaces * clean up now obsolete namespaces * tweaks * refactor sections and overrides * add ability to render components before and after fields * fix titles * chore(sections): remove legacy single-section components replaced by template * refactor configs to use individual files with a template * fix review description * apply hidden fields after ui schema * move util * remove unused i18n * clean up error messages * fix fast refresh * add custom validation and use it for ffmpeg input roles * update nav tree * remove unused * re-add override and modified indicators * mark pending changes and add confirmation dialog for resets * fix red unsaved dot * tweaks * add docs links, readonly keys, and restart required per field * add special case and comments for global motion section * add section form special cases * combine review sections * tweaks * add audio labels endpoint * add audio label switches and input to filter list * fix type * remove key from config when resetting to default/global * don't show description for new key/val fields * tweaks * spacing tweaks * add activity indicator and scrollbar tweaks * add docs to filter fields * wording changes * fix global ffmpeg section * add review classification zones to review form * add backend endpoint and frontend widget for ffmpeg presets and manual args * improve wording * hide descriptions for additional properties arrays * add warning log about incorrectly nested model config * spacing and language tweaks * fix i18n keys * networking section docs and description * small wording tweaks * add layout grid field * refactor with shared utilities * field order * add individual detectors to schema add detector titles and descriptions (docstrings in pydantic are used for descriptions) and add i18n keys to globals * clean up detectors section and i18n * don't save model config back to yaml when saving detectors * add full detectors config to api model dump works around the way we use detector plugins so we can have the full detector config for the frontend * add restart button to toast when restart is required * add ui option to remove inner cards * fix buttons * section tweaks * don't zoom into text on mobile * make buttons sticky at bottom of sections * small tweaks * highlight label of changed fields * add null to enum list when unwrapping * refactor to shared utils and add save all button * add undo all button * add RJSF to dictionary * consolidate utils * preserve form data when changing cameras * add mono fonts * add popover to show what fields will be saved * fix mobile menu not re-rendering with unsaved dots * tweaks * fix logger and env vars config section saving use escaped periods in keys to retain them in the config file (eg "frigate.embeddings") * add timezone widget * role map field with validation * fix validation for model section * add another hidden field * add footer message for required restart * use rjsf for notifications view * fix config saving * add replace rules field * default column layout and add field sizing * clean up field template * refactor profile settings to match rjsf forms * tweaks * refactor frigate+ view and make tweaks to sections * show frigate+ model info in detection model settings when using a frigate+ model * update restartRequired for all fields * fix restart fields * tweaks and add ability enable disabled cameras more backend changes required * require restart when enabling camera that is disabled in config * disable save when form is invalid * refactor ffmpeg section for readability * change label * clean up camera inputs fields * misc tweaks to ffmpeg section - add raw paths endpoint to ensure credentials get saved - restart required tooltip * maintenance settings tweaks * don't mutate with lodash * fix description re-rendering for nullable object fields * hide reindex field * update rjsf * add frigate+ description to settings pane * disable save all when any section is invalid * show translated field name in validation error pane * clean up * remove unused * fix genai merge * fix genai
355 lines
13 KiB
Python
355 lines
13 KiB
Python
import ctypes
|
|
import logging
|
|
import platform
|
|
|
|
import numpy as np
|
|
|
|
try:
|
|
import tensorrt as trt
|
|
from cuda import cuda
|
|
|
|
TRT_VERSION = int(trt.__version__[0 : trt.__version__.find(".")])
|
|
|
|
TRT_SUPPORT = True
|
|
except ModuleNotFoundError:
|
|
TRT_SUPPORT = False
|
|
|
|
from pydantic import ConfigDict, Field
|
|
from typing_extensions import Literal
|
|
|
|
from frigate.detectors.detection_api import DetectionApi
|
|
from frigate.detectors.detector_config import BaseDetectorConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DETECTOR_KEY = "tensorrt"
|
|
|
|
if TRT_SUPPORT:
|
|
|
|
class TrtLogger(trt.ILogger):
|
|
def log(self, severity, msg):
|
|
logger.log(self.getSeverity(severity), msg)
|
|
|
|
def getSeverity(self, sev: trt.ILogger.Severity) -> int:
|
|
if sev == trt.ILogger.VERBOSE:
|
|
return logging.DEBUG
|
|
elif sev == trt.ILogger.INFO:
|
|
return logging.INFO
|
|
elif sev == trt.ILogger.WARNING:
|
|
return logging.WARNING
|
|
elif sev == trt.ILogger.ERROR:
|
|
return logging.ERROR
|
|
elif sev == trt.ILogger.INTERNAL_ERROR:
|
|
return logging.CRITICAL
|
|
else:
|
|
return logging.DEBUG
|
|
|
|
|
|
class TensorRTDetectorConfig(BaseDetectorConfig):
|
|
"""TensorRT detector for Nvidia Jetson devices using serialized TensorRT engines for accelerated inference."""
|
|
|
|
model_config = ConfigDict(
|
|
title="TensorRT",
|
|
)
|
|
|
|
type: Literal[DETECTOR_KEY]
|
|
device: int = Field(
|
|
default=0, title="GPU Device Index", description="The GPU device index to use."
|
|
)
|
|
|
|
|
|
class HostDeviceMem(object):
|
|
"""Simple helper data class that's a little nicer to use than a 2-tuple."""
|
|
|
|
def __init__(self, host_mem, device_mem, nbytes, size):
|
|
self.host = host_mem
|
|
err, self.host_dev = cuda.cuMemHostGetDevicePointer(self.host, 0)
|
|
self.device = device_mem
|
|
self.nbytes = nbytes
|
|
self.size = size
|
|
|
|
def __str__(self):
|
|
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
|
|
|
|
def __repr__(self):
|
|
return self.__str__()
|
|
|
|
def __del__(self):
|
|
cuda.cuMemFreeHost(self.host)
|
|
cuda.cuMemFree(self.device)
|
|
|
|
|
|
class TensorRtDetector(DetectionApi):
|
|
type_key = DETECTOR_KEY
|
|
|
|
def _load_engine(self, model_path):
|
|
try:
|
|
trt.init_libnvinfer_plugins(self.trt_logger, "")
|
|
|
|
ctypes.cdll.LoadLibrary("/usr/local/lib/libyolo_layer.so")
|
|
except OSError as e:
|
|
logger.error(
|
|
"ERROR: failed to load libraries. %s",
|
|
e,
|
|
)
|
|
|
|
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
|
|
return runtime.deserialize_cuda_engine(f.read())
|
|
|
|
def _binding_is_input(self, binding):
|
|
if TRT_VERSION < 10:
|
|
return self.engine.binding_is_input(binding)
|
|
else:
|
|
return binding == "input"
|
|
|
|
def _get_binding_dims(self, binding):
|
|
if TRT_VERSION < 10:
|
|
return self.engine.get_binding_shape(binding)
|
|
else:
|
|
return self.engine.get_tensor_shape(binding)
|
|
|
|
def _get_binding_dtype(self, binding):
|
|
if TRT_VERSION < 10:
|
|
return self.engine.get_binding_dtype(binding)
|
|
else:
|
|
return self.engine.get_tensor_dtype(binding)
|
|
|
|
def _execute(self):
|
|
if TRT_VERSION < 10:
|
|
return self.context.execute_async_v2(
|
|
bindings=self.bindings, stream_handle=self.stream
|
|
)
|
|
else:
|
|
return self.context.execute_v2(self.bindings)
|
|
|
|
def _get_input_shape(self):
|
|
"""Get input shape of the TensorRT YOLO engine."""
|
|
binding = self.engine[0]
|
|
assert self._binding_is_input(binding)
|
|
binding_dims = self._get_binding_dims(binding)
|
|
if len(binding_dims) == 4:
|
|
return (
|
|
tuple(binding_dims[2:]),
|
|
trt.nptype(self._get_binding_dtype(binding)),
|
|
)
|
|
elif len(binding_dims) == 3:
|
|
return (
|
|
tuple(binding_dims[1:]),
|
|
trt.nptype(self._get_binding_dtype(binding)),
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
"bad dims of binding %s: %s" % (binding, str(binding_dims))
|
|
)
|
|
|
|
def _allocate_buffers(self):
|
|
"""Allocates all host/device in/out buffers required for an engine."""
|
|
inputs = []
|
|
outputs = []
|
|
bindings = []
|
|
output_idx = 0
|
|
for binding in self.engine:
|
|
binding_dims = self._get_binding_dims(binding)
|
|
if len(binding_dims) == 4:
|
|
# explicit batch case (TensorRT 7+)
|
|
size = trt.volume(binding_dims)
|
|
elif len(binding_dims) == 3:
|
|
# implicit batch case (TensorRT 6 or older)
|
|
size = trt.volume(binding_dims) * self.engine.max_batch_size
|
|
else:
|
|
raise ValueError(
|
|
"bad dims of binding %s: %s" % (binding, str(binding_dims))
|
|
)
|
|
nbytes = size * self._get_binding_dtype(binding).itemsize
|
|
# Allocate host and device buffers
|
|
err, host_mem = cuda.cuMemHostAlloc(
|
|
nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP
|
|
)
|
|
assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}"
|
|
logger.debug(
|
|
f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self._get_binding_dtype(binding)})"
|
|
)
|
|
err, device_mem = cuda.cuMemAlloc(nbytes)
|
|
assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}"
|
|
# Append the device buffer to device bindings.
|
|
bindings.append(int(device_mem))
|
|
# Append to the appropriate list.
|
|
if self._binding_is_input(binding):
|
|
logger.debug(f"Input has Shape {binding_dims}")
|
|
inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size))
|
|
else:
|
|
# each grid has 3 anchors, each anchor generates a detection
|
|
# output of 7 float32 values
|
|
assert size % 7 == 0, f"output size was {size}"
|
|
logger.debug(f"Output has Shape {binding_dims}")
|
|
outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size))
|
|
output_idx += 1
|
|
assert len(inputs) == 1, f"inputs len was {len(inputs)}"
|
|
assert len(outputs) == 1, f"output len was {len(outputs)}"
|
|
return inputs, outputs, bindings
|
|
|
|
def _do_inference(self):
|
|
"""do_inference (for TensorRT 7.0+)
|
|
This function is generalized for multiple inputs/outputs for full
|
|
dimension networks.
|
|
Inputs and outputs are expected to be lists of HostDeviceMem objects.
|
|
"""
|
|
# Push CUDA Context
|
|
cuda.cuCtxPushCurrent(self.cu_ctx)
|
|
|
|
# Transfer input data to the GPU.
|
|
[
|
|
cuda.cuMemcpyHtoDAsync(inp.device, inp.host, inp.nbytes, self.stream)
|
|
for inp in self.inputs
|
|
]
|
|
|
|
# Run inference.
|
|
if not self._execute():
|
|
logger.warning("Execute returned false")
|
|
|
|
# Transfer predictions back from the GPU.
|
|
[
|
|
cuda.cuMemcpyDtoHAsync(out.host, out.device, out.nbytes, self.stream)
|
|
for out in self.outputs
|
|
]
|
|
|
|
# Synchronize the stream
|
|
cuda.cuStreamSynchronize(self.stream)
|
|
|
|
# Pop CUDA Context
|
|
cuda.cuCtxPopCurrent()
|
|
|
|
# Return only the host outputs.
|
|
return [
|
|
np.array(
|
|
(ctypes.c_float * out.size).from_address(out.host), dtype=np.float32
|
|
)
|
|
for out in self.outputs
|
|
]
|
|
|
|
def __init__(self, detector_config: TensorRTDetectorConfig):
|
|
if platform.machine() == "x86_64":
|
|
logger.error(
|
|
"TensorRT detector is no longer supported on amd64 system. Please use ONNX detector instead, see https://docs.frigate.video/configuration/object_detectors#onnx for more information."
|
|
)
|
|
raise ImportError(
|
|
"TensorRT detector is no longer supported on amd64 system. Please use ONNX detector instead, see https://docs.frigate.video/configuration/object_detectors#onnx for more information."
|
|
)
|
|
|
|
assert TRT_SUPPORT, (
|
|
f"TensorRT libraries not found, {DETECTOR_KEY} detector not present"
|
|
)
|
|
|
|
(cuda_err,) = cuda.cuInit(0)
|
|
assert cuda_err == cuda.CUresult.CUDA_SUCCESS, (
|
|
f"Failed to initialize cuda {cuda_err}"
|
|
)
|
|
err, dev_count = cuda.cuDeviceGetCount()
|
|
logger.debug(f"Num Available Devices: {dev_count}")
|
|
assert detector_config.device < dev_count, (
|
|
f"Invalid TensorRT Device Config. Device {detector_config.device} Invalid."
|
|
)
|
|
err, self.cu_ctx = cuda.cuCtxCreate(
|
|
cuda.CUctx_flags.CU_CTX_MAP_HOST, detector_config.device
|
|
)
|
|
|
|
self.conf_th = 0.4 ##TODO: model config parameter
|
|
self.nms_threshold = 0.4
|
|
err, self.stream = cuda.cuStreamCreate(0)
|
|
self.trt_logger = TrtLogger()
|
|
self.engine = self._load_engine(detector_config.model.path)
|
|
self.input_shape = self._get_input_shape()
|
|
|
|
try:
|
|
self.context = self.engine.create_execution_context()
|
|
(
|
|
self.inputs,
|
|
self.outputs,
|
|
self.bindings,
|
|
) = self._allocate_buffers()
|
|
except Exception as e:
|
|
logger.error(e)
|
|
raise RuntimeError("fail to allocate CUDA resources") from e
|
|
|
|
logger.debug("TensorRT loaded. Input shape is %s", self.input_shape)
|
|
logger.debug("TensorRT version is %s", TRT_VERSION)
|
|
|
|
def __del__(self):
|
|
"""Free CUDA memories."""
|
|
if self.outputs is not None:
|
|
del self.outputs
|
|
if self.inputs is not None:
|
|
del self.inputs
|
|
if self.stream is not None:
|
|
cuda.cuStreamDestroy(self.stream)
|
|
del self.stream
|
|
del self.engine
|
|
del self.context
|
|
del self.trt_logger
|
|
cuda.cuCtxDestroy(self.cu_ctx)
|
|
|
|
def _postprocess_yolo(self, trt_outputs, conf_th):
|
|
"""Postprocess TensorRT outputs.
|
|
# Args
|
|
trt_outputs: a list of 2 or 3 tensors, where each tensor
|
|
contains a multiple of 7 float32 numbers in
|
|
the order of [x, y, w, h, box_confidence, class_id, class_prob]
|
|
conf_th: confidence threshold
|
|
# Returns
|
|
boxes, scores, classes
|
|
"""
|
|
# filter low-conf detections and concatenate results of all yolo layers
|
|
detection_list = []
|
|
for o in trt_outputs:
|
|
detections = o.reshape((-1, 7))
|
|
detections = detections[detections[:, 4] * detections[:, 6] >= conf_th]
|
|
detection_list.append(detections)
|
|
detection_list = np.concatenate(detection_list, axis=0)
|
|
|
|
return detection_list
|
|
|
|
def detect_raw(self, tensor_input):
|
|
# Input tensor has the shape of the [height, width, 3]
|
|
# Output tensor of float32 of shape [20, 6] where:
|
|
# O - class id
|
|
# 1 - score
|
|
# 2..5 - a value between 0 and 1 of the box: [top, left, bottom, right]
|
|
|
|
# normalize
|
|
if self.input_shape[-1] != trt.int8:
|
|
tensor_input = tensor_input.astype(self.input_shape[-1])
|
|
tensor_input /= 255.0
|
|
|
|
self.inputs[0].host = np.ascontiguousarray(
|
|
tensor_input.astype(self.input_shape[-1])
|
|
)
|
|
trt_outputs = self._do_inference()
|
|
|
|
raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
|
|
|
|
if len(raw_detections) == 0:
|
|
return np.zeros((20, 6), np.float32)
|
|
|
|
# raw_detections: Nx7 numpy arrays of
|
|
# [[x, y, w, h, box_confidence, class_id, class_prob],
|
|
|
|
# Calculate score as box_confidence x class_prob
|
|
raw_detections[:, 4] = raw_detections[:, 4] * raw_detections[:, 6]
|
|
# Reorder elements by the score, best on top, remove class_prob
|
|
ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
|
|
# transform width to right with clamp to 0..1
|
|
ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
|
|
# transform height to bottom with clamp to 0..1
|
|
ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
|
|
# put result into the correct order and limit to top 20
|
|
detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
|
|
|
|
# pad to 20x6 shape
|
|
append_cnt = 20 - len(detections)
|
|
if append_cnt > 0:
|
|
detections = np.append(
|
|
detections, np.zeros((append_cnt, 6), np.float32), axis=0
|
|
)
|
|
|
|
return detections
|