mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-02 01:05:20 +03:00
make TensorRT works (and break edgetpu)
This commit is contained in:
parent
47f3d7c460
commit
76d46fe5ed
29
Makefile
29
Makefile
@ -15,7 +15,10 @@ amd64_ffmpeg:
|
||||
docker build --no-cache --pull --tag blakeblackshear/frigate-ffmpeg:1.2.0-amd64 --file docker/Dockerfile.ffmpeg.amd64 .
|
||||
|
||||
nginx_frigate:
|
||||
docker buildx build --push --platform linux/arm/v7,linux/arm64/v8,linux/amd64 --tag blakeblackshear/frigate-nginx:1.0.2 --file docker/Dockerfile.nginx .
|
||||
docker build --tag blakeblackshear/frigate-nginx:1.0.2 --file docker/Dockerfile.nginx .
|
||||
|
||||
nginx_frigate_l4t:
|
||||
docker build --tag blakeblackshear/frigate-nginx-l4t:1.0.2 --file docker/Dockerfile.l4t.nginx .
|
||||
|
||||
amd64_frigate: version web
|
||||
docker build --no-cache --tag frigate-base --build-arg ARCH=amd64 --build-arg FFMPEG_VERSION=1.1.0 --build-arg WHEELS_VERSION=1.0.3 --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.base .
|
||||
@ -41,17 +44,35 @@ aarch64_wheels:
|
||||
aarch64_ffmpeg:
|
||||
docker build --no-cache --pull --tag blakeblackshear/frigate-ffmpeg:1.3.0-aarch64 --file docker/Dockerfile.ffmpeg.aarch64 .
|
||||
|
||||
aarch64_frigate: version web
|
||||
docker build --no-cache --tag frigate-base --build-arg ARCH=aarch64 --build-arg FFMPEG_VERSION=1.0.0 --build-arg WHEELS_VERSION=1.0.3 --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.base .
|
||||
aarch64_frigate:
|
||||
docker build --no-cache --tag frigate-base --build-arg ARCH=aarch64 --build-arg FFMPEG_VERSION=1.3.0 --build-arg WHEELS_VERSION=1.0.3 --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.base .
|
||||
docker build --no-cache --tag frigate --file docker/Dockerfile.aarch64 .
|
||||
|
||||
aarch64_dev:
|
||||
docker build --tag frigate --file docker/Dockerfile.aarch64 .
|
||||
|
||||
aarch64_all: aarch64_wheels aarch64_ffmpeg aarch64_frigate
|
||||
|
||||
l4t_assets_yolo4:
|
||||
mkdir -p $$(pwd)/.l4t_assets
|
||||
cp ./converters/yolo4/plugin/* .l4t_assets/
|
||||
cp ./converters/yolo4/model/yolov4-tiny-416.trt .l4t_assets/yolov4-tiny-416.trt
|
||||
cp ./converters/yolo4/model/yolov4-tiny-288.trt .l4t_assets/yolov4-tiny-288.trt
|
||||
# cp ./converters/yolo4/model/yolov4-416.trt .l4t_assets/yolov4-416.trt
|
||||
# cp ./converters/yolo4/model/yolov4-288.trt .l4t_assets/yolov4-288.trt
|
||||
|
||||
l4t_dev: # l4t_assets_yolo4
|
||||
nvidia-docker build --tag frigate.l4t --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.l4t.base .
|
||||
|
||||
l4t_dev_test:
|
||||
nvidia-docker build --tag frigate.l4t.onnx --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.l4t.onnx ./onnx_test/
|
||||
|
||||
|
||||
armv7_wheels:
|
||||
docker build --tag blakeblackshear/frigate-wheels:1.0.3-armv7 --file docker/Dockerfile.wheels .
|
||||
|
||||
armv7_ffmpeg:
|
||||
docker build --no-cache --pull --tag blakeblackshear/frigate-ffmpeg:1.2.0-armv7 --file docker/Dockerfile.ffmpeg.armv7 .
|
||||
docker build --pull --tag blakeblackshear/frigate-ffmpeg:1.2.0-armv7 --file docker/Dockerfile.ffmpeg.armv7 .
|
||||
|
||||
armv7_frigate: version web
|
||||
docker build --no-cache --tag frigate-base --build-arg ARCH=armv7 --build-arg FFMPEG_VERSION=1.0.0 --build-arg WHEELS_VERSION=1.0.3 --build-arg NGINX_VERSION=1.0.2 --file docker/Dockerfile.base .
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
set -xe
|
||||
cd /tensorrt_demos/plugins && make
|
||||
cp /tensorrt_demos/plugins/libyolo_layer.so /plugin/libyolo_layer.so
|
||||
|
||||
cd /tensorrt_demos/yolo
|
||||
for model in yolov4-tiny-288 \
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p $(pwd)/model
|
||||
mkdir -p $(pwd)/plugin
|
||||
|
||||
docker build --tag models.yolo4 --file ./Dockerfile.l4t.tf15 ./assets/
|
||||
|
||||
sudo docker run --rm -it --name models.yolo4 \
|
||||
--mount type=tmpfs,target=/tmp/cache,tmpfs-size=1000000000 \
|
||||
-v $(pwd)/model:/model:rw \
|
||||
-v $(pwd)/plugin:/plugin:rw \
|
||||
-v /tmp/argus_socket:/tmp/argus_socket \
|
||||
-e NVIDIA_VISIBLE_DEVICES=all \
|
||||
-e NVIDIA_DRIVER_CAPABILITIES=compute,utility,video \
|
||||
|
||||
@ -1,21 +1,24 @@
|
||||
ARG NGINX_VERSION
|
||||
FROM blakeblackshear/frigate-nginx-l4t:${NGINX_VERSION} as nginx
|
||||
FROM frigate-web as web
|
||||
FROM nvcr.io/nvidia/l4t-tensorflow:r32.6.1-tf2.5-py3 as wheels
|
||||
FROM nvcr.io/nvidia/l4t-tensorrt:r8.0.1-runtime as wheels
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get -qq update \
|
||||
&& apt-get -qq install -y \
|
||||
python3.8 \
|
||||
python3.8-dev \
|
||||
wget \
|
||||
# opencv dependencies
|
||||
build-essential cmake git pkg-config libgtk-3-dev \
|
||||
build-essential cmake git pkg-config libgtk-3-dev
|
||||
|
||||
RUN apt-get -qq install -y \
|
||||
libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \
|
||||
libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev \
|
||||
libxvidcore-dev libx264-dev libjpeg-dev libpng-dev libtiff-dev
|
||||
RUN apt-get -qq install -y \
|
||||
gfortran openexr libatlas-base-dev libssl-dev\
|
||||
libtbb2 libtbb-dev libdc1394-22-dev libopenexr-dev \
|
||||
libtbb2 libtbb-dev libdc1394-22-dev libopenexr-dev
|
||||
RUN apt-get -qq install -y \
|
||||
libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev \
|
||||
# scipy dependencies
|
||||
gcc gfortran libopenblas-dev liblapack-dev cython
|
||||
@ -39,7 +42,7 @@ RUN pip3 wheel --wheel-dir=/wheels \
|
||||
setproctitle \
|
||||
peewee
|
||||
|
||||
FROM nvcr.io/nvidia/l4t-tensorflow:r32.6.1-tf2.5-py3
|
||||
FROM nvcr.io/nvidia/l4t-tensorrt:r8.0.1-runtime
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN \
|
||||
apt-get update && apt-get install -y gnupg
|
||||
@ -78,20 +81,11 @@ RUN pip3 install \
|
||||
peewee_migrate \
|
||||
pydantic \
|
||||
zeroconf \
|
||||
ws4py \
|
||||
# Python 3.6
|
||||
shared-memory38
|
||||
ws4py
|
||||
|
||||
|
||||
# setup gstreamer
|
||||
RUN \
|
||||
apt-get update && apt-get install -y software-properties-common && \
|
||||
add-apt-repository universe && \
|
||||
add-apt-repository multiverse && \
|
||||
apt-get update
|
||||
|
||||
RUN \
|
||||
apt-get install -y gstreamer1.0-tools gstreamer1.0-alsa \
|
||||
apt-get update && apt-get install -y gstreamer1.0-tools gstreamer1.0-alsa \
|
||||
gstreamer1.0-plugins-base gstreamer1.0-plugins-good \
|
||||
gstreamer1.0-plugins-bad gstreamer1.0-plugins-ugly
|
||||
|
||||
@ -110,24 +104,35 @@ RUN wget -q https://github.com/google-coral/test_data/raw/release-frogfish/ssdli
|
||||
COPY --from=nginx /usr/local/nginx/ /usr/local/nginx/
|
||||
COPY --from=web /opt/frigate/build /opt/frigate/web/
|
||||
|
||||
# install TRT dependencies
|
||||
RUN apt-get update && apt-get install -y git sudo
|
||||
ADD docker/l4t/ /l4t/
|
||||
RUN /l4t/install_pycuda.sh
|
||||
|
||||
# s6-overlay
|
||||
COPY docker/rootfs/ /
|
||||
ADD https://github.com/just-containers/s6-overlay/releases/download/v2.2.0.3/s6-overlay-aarch64-installer /tmp/
|
||||
RUN chmod +x /tmp/s6-overlay-aarch64-installer && /tmp/s6-overlay-aarch64-installer /
|
||||
|
||||
|
||||
WORKDIR /opt/frigate/
|
||||
ADD frigate frigate/
|
||||
ADD migrations migrations/
|
||||
COPY labelmap.txt /labelmap.txt
|
||||
COPY detect.tflite /detect.tflite
|
||||
|
||||
|
||||
# edgetpu experiments
|
||||
RUN wget -q https://github.com/Azure/Azure-AI-Camp/releases/download/v1.0/yolov4-tiny.tflite -O /yolov4-tiny.tflite
|
||||
|
||||
|
||||
|
||||
# TRT Yolo4 Plugin
|
||||
ADD .l4t_assets /yolo4/
|
||||
|
||||
EXPOSE 5000
|
||||
EXPOSE 1935
|
||||
|
||||
WORKDIR /opt/frigate/
|
||||
|
||||
ADD frigate frigate/
|
||||
ADD migrations migrations/
|
||||
ENTRYPOINT ["/init"]
|
||||
CMD ["python3", "-u", "-m", "frigate"]
|
||||
COPY frigate frigate/
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
FROM nvcr.io/nvidia/l4t-tensorflow:r32.6.1-tf2.5-py3 AS base
|
||||
FROM nvcr.io/nvidia/l4t-tensorrt:r8.0.1-runtime AS base
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get -yqq update && \
|
||||
|
||||
@ -36,6 +36,7 @@ class FrigateBaseModel(BaseModel):
|
||||
class DetectorTypeEnum(str, Enum):
|
||||
edgetpu = "edgetpu"
|
||||
cpu = "cpu"
|
||||
tensorrt = "tensorrt"
|
||||
|
||||
|
||||
class DetectorConfig(FrigateBaseModel):
|
||||
|
||||
@ -1,12 +1,113 @@
|
||||
import datetime
|
||||
import logging
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import queue
|
||||
import signal
|
||||
import threading
|
||||
import os
|
||||
import numpy as np
|
||||
import multiprocessing as mp
|
||||
from frigate.util import EventsPerSecond
|
||||
from frigate.util import EventsPerSecond, SharedMemoryFrameManager, listen
|
||||
from frigate.config import DetectorConfig, DetectorTypeEnum
|
||||
from frigate.detection.object_detector import ObjectDetector
|
||||
import importlib
|
||||
from setproctitle import setproctitle
|
||||
from typing import Dict, Callable
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DETECTORS = {
|
||||
DetectorTypeEnum.cpu: "edgetpu",
|
||||
DetectorTypeEnum.edgetpu: "edgetpu",
|
||||
DetectorTypeEnum.tensorrt: "tensorrt",
|
||||
}
|
||||
|
||||
|
||||
def get_object_detector_factory(
|
||||
detector_config: DetectorConfig, model_path: str
|
||||
) -> Callable[[], ObjectDetector]:
|
||||
"""
|
||||
Return an object detector factory.
|
||||
Since resource initialization might be performed on python import,
|
||||
delay module load until the thread started
|
||||
"""
|
||||
detector_module = DETECTORS.get(detector_config.type)
|
||||
if detector_module is None:
|
||||
logger.error(f"Unsupported detector type '{detector_config.type}'.")
|
||||
return None
|
||||
|
||||
def _detector_factory() -> ObjectDetector:
|
||||
path = os.path.join(os.path.dirname(__file__), f"{detector_module}.py")
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"frigate.detection.{detector_module}", path
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
object_detector = module.object_detector_factory(detector_config, model_path)
|
||||
return object_detector
|
||||
|
||||
return _detector_factory
|
||||
|
||||
|
||||
def run_detector(
|
||||
name: str,
|
||||
detection_queue: mp.Queue,
|
||||
out_events: Dict[str, mp.Event],
|
||||
avg_speed,
|
||||
start,
|
||||
model_shape,
|
||||
object_detector_factory: Callable[[], ObjectDetector],
|
||||
):
|
||||
threading.current_thread().name = f"detector:{name}"
|
||||
logger = logging.getLogger(f"detector.{name}")
|
||||
logger.info(f"Starting detection process: {os.getpid()}")
|
||||
setproctitle(f"frigate.detector.{name}")
|
||||
listen()
|
||||
|
||||
stop_event = mp.Event()
|
||||
|
||||
def receiveSignal(signalNumber, frame):
|
||||
stop_event.set()
|
||||
|
||||
signal.signal(signal.SIGTERM, receiveSignal)
|
||||
signal.signal(signal.SIGINT, receiveSignal)
|
||||
|
||||
frame_manager = SharedMemoryFrameManager()
|
||||
|
||||
outputs = {}
|
||||
for name in out_events.keys():
|
||||
out_shm = mp.shared_memory.SharedMemory(name=f"out-{name}", create=False)
|
||||
out_np = np.ndarray((20, 6), dtype=np.float32, buffer=out_shm.buf)
|
||||
outputs[name] = {"shm": out_shm, "np": out_np}
|
||||
|
||||
object_detector = object_detector_factory()
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
connection_id = detection_queue.get(timeout=5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
input_frame = frame_manager.get(
|
||||
connection_id, (model_shape[0], model_shape[1], 3)
|
||||
)
|
||||
|
||||
if input_frame is None:
|
||||
continue
|
||||
|
||||
# detect and send the output
|
||||
start.value = datetime.datetime.now().timestamp()
|
||||
detections = object_detector.detect_raw(input_frame)
|
||||
duration = datetime.datetime.now().timestamp() - start.value
|
||||
outputs[connection_id]["np"][:] = detections[:]
|
||||
out_events[connection_id].set()
|
||||
start.value = 0.0
|
||||
|
||||
avg_speed.value = (avg_speed.value * 9 + duration) / 10
|
||||
del object_detector
|
||||
|
||||
|
||||
class DetectionProcess:
|
||||
def __init__(
|
||||
self,
|
||||
@ -27,18 +128,11 @@ class DetectionProcess:
|
||||
self.model_shape = model_shape
|
||||
self.detector_config = detector_config
|
||||
|
||||
self.detector_target = None
|
||||
if (
|
||||
detector_config.type == DetectorTypeEnum.cpu
|
||||
or detector_config.type == DetectorTypeEnum.edgetpu
|
||||
):
|
||||
from .edgetpu import run_detector as edgetpu_detector
|
||||
|
||||
self.detector_target = edgetpu_detector
|
||||
|
||||
assert self.detector_target, "Invalid detector configuration"
|
||||
|
||||
self.start_or_restart()
|
||||
self.object_detector_factory = get_object_detector_factory(
|
||||
detector_config, model_path
|
||||
)
|
||||
if self.object_detector_factory:
|
||||
self.start_or_restart()
|
||||
|
||||
def stop(self):
|
||||
self.detect_process.terminate()
|
||||
@ -54,7 +148,7 @@ class DetectionProcess:
|
||||
if (not self.detect_process is None) and self.detect_process.is_alive():
|
||||
self.stop()
|
||||
self.detect_process = mp.Process(
|
||||
target=self.detector_target,
|
||||
target=run_detector,
|
||||
name=f"detector:{self.name}",
|
||||
args=(
|
||||
self.name,
|
||||
@ -62,9 +156,8 @@ class DetectionProcess:
|
||||
self.out_events,
|
||||
self.avg_inference_speed,
|
||||
self.detection_start,
|
||||
self.model_path,
|
||||
self.model_shape,
|
||||
self.detector_config,
|
||||
self.object_detector_factory,
|
||||
),
|
||||
)
|
||||
self.detect_process.daemon = True
|
||||
@ -103,9 +196,11 @@ class RemoteObjectDetector:
|
||||
for d in self.out_np_shm:
|
||||
if d[1] < threshold:
|
||||
break
|
||||
detections.append(
|
||||
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
|
||||
)
|
||||
label_key = int(d[0])
|
||||
if label_key in self.labels:
|
||||
detections.append(
|
||||
(self.labels[label_key], float(d[1]), (d[2], d[3], d[4], d[5]))
|
||||
)
|
||||
self.fps.update()
|
||||
return detections
|
||||
|
||||
|
||||
@ -1,26 +1,39 @@
|
||||
import datetime
|
||||
import logging
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import queue
|
||||
import signal
|
||||
import threading
|
||||
from frigate.config import DetectorConfig
|
||||
from frigate.config import DetectorConfig, DetectorTypeEnum
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
# import tflite_runtime.interpreter as tflite
|
||||
from setproctitle import setproctitle
|
||||
|
||||
|
||||
# from tflite_runtime.interpreter import load_delegate
|
||||
|
||||
from frigate.util import EventsPerSecond, SharedMemoryFrameManager, listen
|
||||
from frigate.util import EventsPerSecond
|
||||
from .object_detector import ObjectDetector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def object_detector_factory(detector_config: DetectorConfig, model_path: str):
|
||||
if not (
|
||||
detector_config.type == DetectorTypeEnum.cpu
|
||||
or detector_config.type == DetectorTypeEnum.edgetpu
|
||||
):
|
||||
return None
|
||||
object_detector = LocalObjectDetector(
|
||||
tf_device=detector_config.device,
|
||||
model_path=model_path,
|
||||
num_threads=detector_config.num_threads,
|
||||
)
|
||||
return object_detector
|
||||
|
||||
|
||||
class LocalObjectDetector(ObjectDetector):
|
||||
def __init__(self, tf_device=None, model_path=None, num_threads=3):
|
||||
self.fps = EventsPerSecond()
|
||||
@ -80,6 +93,11 @@ class LocalObjectDetector(ObjectDetector):
|
||||
return detections
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
logger.error(">>>>>>>>>> detect raw")
|
||||
|
||||
# Expand dimensions [height, width, 3] ince the model expects images to have shape [1, height, width, 3]
|
||||
tensor_input = np.expand_dims(tensor_input, axis=0)
|
||||
|
||||
# self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
|
||||
# self.interpreter.invoke()
|
||||
|
||||
@ -105,63 +123,3 @@ class LocalObjectDetector(ObjectDetector):
|
||||
# ]
|
||||
|
||||
return detections
|
||||
|
||||
|
||||
def run_detector(
|
||||
name: str,
|
||||
detection_queue: mp.Queue,
|
||||
out_events: Dict[str, mp.Event],
|
||||
avg_speed,
|
||||
start,
|
||||
model_path,
|
||||
model_shape,
|
||||
detector_config: DetectorConfig,
|
||||
):
|
||||
threading.current_thread().name = f"detector:{name}"
|
||||
logger = logging.getLogger(f"detector.{name}")
|
||||
logger.info(f"Starting detection process: {os.getpid()}")
|
||||
setproctitle(f"frigate.detector.{name}")
|
||||
listen()
|
||||
|
||||
stop_event = mp.Event()
|
||||
|
||||
def receiveSignal(signalNumber, frame):
|
||||
stop_event.set()
|
||||
|
||||
signal.signal(signal.SIGTERM, receiveSignal)
|
||||
signal.signal(signal.SIGINT, receiveSignal)
|
||||
|
||||
frame_manager = SharedMemoryFrameManager()
|
||||
object_detector = LocalObjectDetector(
|
||||
tf_device=detector_config.device,
|
||||
model_path=model_path,
|
||||
num_threads=detector_config.num_threads,
|
||||
)
|
||||
|
||||
outputs = {}
|
||||
for name in out_events.keys():
|
||||
out_shm = mp.shared_memory.SharedMemory(name=f"out-{name}", create=False)
|
||||
out_np = np.ndarray((20, 6), dtype=np.float32, buffer=out_shm.buf)
|
||||
outputs[name] = {"shm": out_shm, "np": out_np}
|
||||
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
connection_id = detection_queue.get(timeout=5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
input_frame = frame_manager.get(
|
||||
connection_id, (1, model_shape[0], model_shape[1], 3)
|
||||
)
|
||||
|
||||
if input_frame is None:
|
||||
continue
|
||||
|
||||
# detect and send the output
|
||||
start.value = datetime.datetime.now().timestamp()
|
||||
detections = object_detector.detect_raw(input_frame)
|
||||
duration = datetime.datetime.now().timestamp() - start.value
|
||||
outputs[connection_id]["np"][:] = detections[:]
|
||||
out_events[connection_id].set()
|
||||
start.value = 0.0
|
||||
|
||||
avg_speed.value = (avg_speed.value * 9 + duration) / 10
|
||||
|
||||
223
frigate/detection/tensorrt.py
Normal file
223
frigate/detection/tensorrt.py
Normal file
@ -0,0 +1,223 @@
|
||||
import logging
|
||||
from frigate.config import DetectorConfig, DetectorTypeEnum
|
||||
from frigate.util import EventsPerSecond
|
||||
import ctypes
|
||||
import numpy as np
|
||||
import tensorrt as trt
|
||||
import pycuda.driver as cuda
|
||||
from .object_detector import ObjectDetector
|
||||
import pycuda.autoinit # This is needed for initializing CUDA driver
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def object_detector_factory(detector_config: DetectorConfig, model_path: str):
|
||||
if detector_config.type != DetectorTypeEnum.tensorrt:
|
||||
return None
|
||||
try:
|
||||
ctypes.cdll.LoadLibrary("/yolo4/libyolo_layer.so")
|
||||
except OSError as e:
|
||||
logger.error("ERROR: failed to load /yolo4/libyolo_layer.so. %s", e)
|
||||
return LocalObjectDetector(detector_config, model_path)
|
||||
|
||||
|
||||
class HostDeviceMem(object):
|
||||
"""Simple helper data class that's a little nicer to use than a 2-tuple."""
|
||||
|
||||
def __init__(self, host_mem, device_mem):
|
||||
self.host = host_mem
|
||||
self.device = device_mem
|
||||
|
||||
def __str__(self):
|
||||
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class LocalObjectDetector(ObjectDetector):
|
||||
def _load_engine(self, model_path):
|
||||
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
|
||||
return runtime.deserialize_cuda_engine(f.read())
|
||||
|
||||
def _get_input_shape(self):
|
||||
"""Get input shape of the TensorRT YOLO engine."""
|
||||
binding = self.engine[0]
|
||||
assert self.engine.binding_is_input(binding)
|
||||
binding_dims = self.engine.get_binding_shape(binding)
|
||||
if len(binding_dims) == 4:
|
||||
return tuple(binding_dims[2:])
|
||||
elif len(binding_dims) == 3:
|
||||
return tuple(binding_dims[1:])
|
||||
else:
|
||||
raise ValueError(
|
||||
"bad dims of binding %s: %s" % (binding, str(binding_dims))
|
||||
)
|
||||
|
||||
def _allocate_buffers(self):
|
||||
"""Allocates all host/device in/out buffers required for an engine."""
|
||||
inputs = []
|
||||
outputs = []
|
||||
bindings = []
|
||||
output_idx = 0
|
||||
stream = cuda.Stream()
|
||||
for binding in self.engine:
|
||||
binding_dims = self.engine.get_binding_shape(binding)
|
||||
if len(binding_dims) == 4:
|
||||
# explicit batch case (TensorRT 7+)
|
||||
size = trt.volume(binding_dims)
|
||||
elif len(binding_dims) == 3:
|
||||
# implicit batch case (TensorRT 6 or older)
|
||||
size = trt.volume(binding_dims) * self.engine.max_batch_size
|
||||
else:
|
||||
raise ValueError(
|
||||
"bad dims of binding %s: %s" % (binding, str(binding_dims))
|
||||
)
|
||||
dtype = trt.nptype(self.engine.get_binding_dtype(binding))
|
||||
# Allocate host and device buffers
|
||||
host_mem = cuda.pagelocked_empty(size, dtype)
|
||||
device_mem = cuda.mem_alloc(host_mem.nbytes)
|
||||
# Append the device buffer to device bindings.
|
||||
bindings.append(int(device_mem))
|
||||
# Append to the appropriate list.
|
||||
if self.engine.binding_is_input(binding):
|
||||
inputs.append(HostDeviceMem(host_mem, device_mem))
|
||||
else:
|
||||
# each grid has 3 anchors, each anchor generates a detection
|
||||
# output of 7 float32 values
|
||||
assert size % 7 == 0
|
||||
outputs.append(HostDeviceMem(host_mem, device_mem))
|
||||
output_idx += 1
|
||||
assert len(inputs) == 1
|
||||
assert len(outputs) == 1
|
||||
return inputs, outputs, bindings, stream
|
||||
|
||||
def _do_inference(self):
|
||||
"""do_inference (for TensorRT 7.0+)
|
||||
|
||||
This function is generalized for multiple inputs/outputs for full
|
||||
dimension networks.
|
||||
Inputs and outputs are expected to be lists of HostDeviceMem objects.
|
||||
"""
|
||||
# Transfer input data to the GPU.
|
||||
[
|
||||
cuda.memcpy_htod_async(inp.device, inp.host, self.stream)
|
||||
for inp in self.inputs
|
||||
]
|
||||
# Run inference.
|
||||
self.context.execute_async_v2(
|
||||
bindings=self.bindings, stream_handle=self.stream.handle
|
||||
)
|
||||
# Transfer predictions back from the GPU.
|
||||
[
|
||||
cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
|
||||
for out in self.outputs
|
||||
]
|
||||
# Synchronize the stream
|
||||
self.stream.synchronize()
|
||||
# Return only the host outputs.
|
||||
return [out.host for out in self.outputs]
|
||||
|
||||
def __init__(self, detector_config: DetectorConfig, model_path: str):
|
||||
self.fps = EventsPerSecond()
|
||||
self.conf_th = 0.4 ##TODO: model config parameter
|
||||
self.nms_threshold = 0.4
|
||||
self.trt_logger = trt.Logger(trt.Logger.INFO)
|
||||
self.engine = self._load_engine(model_path)
|
||||
self.input_shape = self._get_input_shape()
|
||||
|
||||
try:
|
||||
self.context = self.engine.create_execution_context()
|
||||
(
|
||||
self.inputs,
|
||||
self.outputs,
|
||||
self.bindings,
|
||||
self.stream,
|
||||
) = self._allocate_buffers()
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
raise RuntimeError("fail to allocate CUDA resources") from e
|
||||
|
||||
logger.debug("TensorRT loaded. Input shape is %s", self.input_shape)
|
||||
logger.debug("TensorRT version is %s", trt.__version__[0])
|
||||
|
||||
def __del__(self):
|
||||
"""Free CUDA memories."""
|
||||
del self.outputs
|
||||
del self.inputs
|
||||
del self.stream
|
||||
|
||||
def _postprocess_yolo(self, trt_outputs, img_w, img_h, conf_th, nms_threshold):
|
||||
"""Postprocess TensorRT outputs.
|
||||
|
||||
# Args
|
||||
trt_outputs: a list of 2 or 3 tensors, where each tensor
|
||||
contains a multiple of 7 float32 numbers in
|
||||
the order of [x, y, w, h, box_confidence, class_id, class_prob]
|
||||
conf_th: confidence threshold
|
||||
|
||||
# Returns
|
||||
boxes, scores, classes
|
||||
"""
|
||||
# filter low-conf detections and concatenate results of all yolo layers
|
||||
detections = []
|
||||
for o in trt_outputs:
|
||||
dets = o.reshape((-1, 7))
|
||||
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
|
||||
detections.append(dets)
|
||||
detections = np.concatenate(detections, axis=0)
|
||||
|
||||
return detections
|
||||
|
||||
def detect(self, tensor_input, threshold=0.4):
|
||||
pass
|
||||
|
||||
def detect_raw(self, tensor_input):
|
||||
# Input tensor has the shape of the [height, width, 3]
|
||||
# Output tensor of float32 of shape [20, 6] where:
|
||||
# O - class id
|
||||
# 1 - score
|
||||
# 2..5 - a value between 0 and 1 of the box: [top, left, bottom, right]
|
||||
|
||||
# transform [height, width, 3] into (3, H, W)
|
||||
tensor_input = tensor_input.transpose((2, 0, 1)).astype(np.float32)
|
||||
|
||||
# normalize
|
||||
tensor_input /= 255.0
|
||||
|
||||
self.inputs[0].host = np.ascontiguousarray(tensor_input)
|
||||
trt_outputs = self._do_inference()
|
||||
|
||||
raw_detections = self._postprocess_yolo(
|
||||
trt_outputs,
|
||||
tensor_input.shape[1],
|
||||
tensor_input.shape[0],
|
||||
self.conf_th,
|
||||
nms_threshold=self.nms_threshold,
|
||||
)
|
||||
|
||||
if len(raw_detections) == 0:
|
||||
return np.zeros((20, 6), np.float32)
|
||||
|
||||
# raw_detections: Nx7 numpy arrays of
|
||||
# [[x, y, w, h, box_confidence, class_id, class_prob],
|
||||
|
||||
# Calculate score as box_confidence x class_prob
|
||||
raw_detections[:, 4] = raw_detections[:, 4] * raw_detections[:, 6]
|
||||
# Reorder elements by the score, best on top, remove class_prob
|
||||
ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
|
||||
# transform width to right with clamp to 0..1
|
||||
ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
|
||||
# transform height to bottom with clamp to 0..1
|
||||
ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
|
||||
# put result into the correct order and limit to top 20
|
||||
detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
|
||||
# pad to 20x6 shape
|
||||
append_cnt = 20 - len(detections)
|
||||
if append_cnt > 0:
|
||||
detections = np.append(
|
||||
detections, np.zeros((append_cnt, 6), np.float32), axis=0
|
||||
)
|
||||
|
||||
self.fps.update()
|
||||
return detections
|
||||
@ -77,14 +77,13 @@ def filtered(obj, objects_to_track, object_filters):
|
||||
def create_tensor_input(frame, model_shape, region):
|
||||
cropped_frame = yuv_region_2_rgb(frame, region)
|
||||
|
||||
# Resize to 300x300 if needed
|
||||
# Resize to the model_shape if needed
|
||||
if cropped_frame.shape != (model_shape[0], model_shape[1], 3):
|
||||
cropped_frame = cv2.resize(
|
||||
cropped_frame, dsize=model_shape, interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
# Expand dimensions since the model expects images to have shape: [1, height, width, 3]
|
||||
return np.expand_dims(cropped_frame, axis=0)
|
||||
# Return a tensor of shape: [height, width, 3] in RGB format
|
||||
return cropped_frame
|
||||
|
||||
|
||||
def stop_ffmpeg(ffmpeg_process, logger):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user