diff --git a/docker/tensorrt/Dockerfile.base b/docker/tensorrt/Dockerfile.base index 59ead46f5..3226c8887 100644 --- a/docker/tensorrt/Dockerfile.base +++ b/docker/tensorrt/Dockerfile.base @@ -11,8 +11,14 @@ FROM ${TRT_BASE} AS trt-deps ARG COMPUTE_LEVEL RUN apt-get update \ - && apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev + +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb \ +&& dpkg -i cuda-keyring_1.1-1_all.deb \ +&& apt-get update \ +&& apt-get -y install cuda-toolkit \ +&& rm -rf /var/lib/apt/lists/* + RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \ /tensorrt_libyolo.sh @@ -24,6 +30,8 @@ ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos +COPY --from=trt-deps /usr/local/cuda-12.6 /usr/local/cuda +COPY --from=trt-deps /etc/alternatives /etc/alternatives COPY docker/tensorrt/detector/rootfs/ / ENV YOLO_MODELS="yolov7-320" diff --git a/docker/tensorrt/requirements-amd64.txt b/docker/tensorrt/requirements-amd64.txt index 6cbfacd40..e6f1a913d 100644 --- a/docker/tensorrt/requirements-amd64.txt +++ b/docker/tensorrt/requirements-amd64.txt @@ -7,7 +7,7 @@ cython == 0.29.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64' nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64' -nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64' +nvidia-cudnn-cu11 == 8.5.0.*; platform_machine == 'x86_64' onnx==1.14.0; platform_machine == 'x86_64' -onnxruntime-gpu==1.18.0; platform_machine == 'x86_64' +onnxruntime-gpu==1.17.*; platform_machine == 'x86_64' protobuf==3.20.3; platform_machine == 'x86_64' diff --git a/docker/tensorrt/trt.mk b/docker/tensorrt/trt.mk index ad2425c81..f2137b3bc 100644 --- a/docker/tensorrt/trt.mk +++ b/docker/tensorrt/trt.mk @@ -20,7 +20,5 @@ build-trt: $(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt $(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt -push-trt: build-trt - $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt - $(JETPACK4_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt - $(JETPACK5_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt +push-trt: + $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=crzynik/trt tensorrt diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index 6b124f6c9..1939b7323 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -1,11 +1,15 @@ import logging +import cv2 import numpy as np from typing_extensions import Literal from frigate.detectors.detection_api import DetectionApi -from frigate.detectors.detector_config import BaseDetectorConfig -from frigate.detectors.util import preprocess +from frigate.detectors.detector_config import ( + BaseDetectorConfig, + ModelTypeEnum, + PixelFormatEnum, +) logger = logging.getLogger(__name__) @@ -21,7 +25,7 @@ class ONNXDetector(DetectionApi): def __init__(self, detector_config: ONNXDetectorConfig): try: - import onnxruntime + import onnxruntime as ort logger.info("ONNX: loaded onnxruntime module") except ModuleNotFoundError: @@ -32,16 +36,54 @@ class ONNXDetector(DetectionApi): path = detector_config.model.path logger.info(f"ONNX: loading {detector_config.model.path}") - self.model = onnxruntime.InferenceSession(path) + self.model = ort.InferenceSession(path, providers=ort.get_available_providers()) + + self.h = detector_config.model.height + self.w = detector_config.model.width + self.onnx_model_type = detector_config.model.model_type + self.onnx_model_px = detector_config.model.input_pixel_format + path = detector_config.model.path + logger.info(f"ONNX: {path} loaded") def detect_raw(self, tensor_input): model_input_name = self.model.get_inputs()[0].name model_input_shape = self.model.get_inputs()[0].shape - tensor_input = preprocess(tensor_input, model_input_shape, np.float32) - # ruff: noqa: F841 - tensor_output = self.model.run(None, {model_input_name: tensor_input})[0] - raise Exception( - "No models are currently supported via onnx. See the docs for more info." - ) + # adjust input shape + if self.onnx_model_type == ModelTypeEnum.yolonas: + tensor_input = cv2.dnn.blobFromImage( + tensor_input[0], + 1.0, + (model_input_shape[3], model_input_shape[2]), + None, + swapRB=self.onnx_model_px == PixelFormatEnum.bgr, + ).astype(np.uint8) + + tensor_output = self.model.run(None, {model_input_name: tensor_input}) + + if self.onnx_model_type == ModelTypeEnum.yolonas: + predictions = tensor_output[0] + + detections = np.zeros((20, 6), np.float32) + + for i, prediction in enumerate(predictions): + if i == 20: + break + (_, x_min, y_min, x_max, y_max, confidence, class_id) = prediction + # when running in GPU mode, empty predictions in the output have class_id of -1 + if class_id < 0: + break + detections[i] = [ + class_id, + confidence, + y_min / self.h, + x_min / self.w, + y_max / self.h, + x_max / self.w, + ] + return detections + else: + raise Exception( + f"{self.onnx_model_type} is currently not supported for rocm. See the docs for more info on supported models." + )