Add support for yolonas in onnx

This commit is contained in:
Nicolas Mowen 2024-09-16 07:54:01 -06:00
parent e4ea35e626
commit 9dc6d4658b
4 changed files with 66 additions and 18 deletions

View File

@ -11,8 +11,14 @@ FROM ${TRT_BASE} AS trt-deps
ARG COMPUTE_LEVEL ARG COMPUTE_LEVEL
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev \ && apt-get install -y git build-essential cuda-nvcc-* cuda-nvtx-* libnvinfer-dev libnvinfer-plugin-dev libnvparsers-dev libnvonnxparsers-dev
&& rm -rf /var/lib/apt/lists/*
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb \
&& dpkg -i cuda-keyring_1.1-1_all.deb \
&& apt-get update \
&& apt-get -y install cuda-toolkit \
&& rm -rf /var/lib/apt/lists/*
RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \ RUN --mount=type=bind,source=docker/tensorrt/detector/tensorrt_libyolo.sh,target=/tensorrt_libyolo.sh \
/tensorrt_libyolo.sh /tensorrt_libyolo.sh
@ -24,6 +30,8 @@ ENV S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0
COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so COPY --from=trt-deps /usr/local/lib/libyolo_layer.so /usr/local/lib/libyolo_layer.so
COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos COPY --from=trt-deps /usr/local/src/tensorrt_demos /usr/local/src/tensorrt_demos
COPY --from=trt-deps /usr/local/cuda-12.6 /usr/local/cuda
COPY --from=trt-deps /etc/alternatives /etc/alternatives
COPY docker/tensorrt/detector/rootfs/ / COPY docker/tensorrt/detector/rootfs/ /
ENV YOLO_MODELS="yolov7-320" ENV YOLO_MODELS="yolov7-320"

View File

@ -7,7 +7,7 @@ cython == 0.29.*; platform_machine == 'x86_64'
nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64' nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64' nvidia-cudnn-cu11 == 8.5.0.*; platform_machine == 'x86_64'
onnx==1.14.0; platform_machine == 'x86_64' onnx==1.14.0; platform_machine == 'x86_64'
onnxruntime-gpu==1.18.0; platform_machine == 'x86_64' onnxruntime-gpu==1.17.*; platform_machine == 'x86_64'
protobuf==3.20.3; platform_machine == 'x86_64' protobuf==3.20.3; platform_machine == 'x86_64'

View File

@ -20,7 +20,5 @@ build-trt:
$(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt $(JETPACK4_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt
$(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt $(JETPACK5_ARGS) docker buildx bake --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt
push-trt: build-trt push-trt:
$(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt tensorrt $(X86_DGPU_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=crzynik/trt tensorrt
$(JETPACK4_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp4 tensorrt
$(JETPACK5_ARGS) docker buildx bake --push --file=docker/tensorrt/trt.hcl --set tensorrt.tags=$(IMAGE_REPO):${GITHUB_REF_NAME}-$(COMMIT_HASH)-tensorrt-jp5 tensorrt

View File

@ -1,11 +1,15 @@
import logging import logging
import cv2
import numpy as np import numpy as np
from typing_extensions import Literal from typing_extensions import Literal
from frigate.detectors.detection_api import DetectionApi from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detector_config import BaseDetectorConfig from frigate.detectors.detector_config import (
from frigate.detectors.util import preprocess BaseDetectorConfig,
ModelTypeEnum,
PixelFormatEnum,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -21,7 +25,7 @@ class ONNXDetector(DetectionApi):
def __init__(self, detector_config: ONNXDetectorConfig): def __init__(self, detector_config: ONNXDetectorConfig):
try: try:
import onnxruntime import onnxruntime as ort
logger.info("ONNX: loaded onnxruntime module") logger.info("ONNX: loaded onnxruntime module")
except ModuleNotFoundError: except ModuleNotFoundError:
@ -32,16 +36,54 @@ class ONNXDetector(DetectionApi):
path = detector_config.model.path path = detector_config.model.path
logger.info(f"ONNX: loading {detector_config.model.path}") logger.info(f"ONNX: loading {detector_config.model.path}")
self.model = onnxruntime.InferenceSession(path) self.model = ort.InferenceSession(path, providers=ort.get_available_providers())
self.h = detector_config.model.height
self.w = detector_config.model.width
self.onnx_model_type = detector_config.model.model_type
self.onnx_model_px = detector_config.model.input_pixel_format
path = detector_config.model.path
logger.info(f"ONNX: {path} loaded") logger.info(f"ONNX: {path} loaded")
def detect_raw(self, tensor_input): def detect_raw(self, tensor_input):
model_input_name = self.model.get_inputs()[0].name model_input_name = self.model.get_inputs()[0].name
model_input_shape = self.model.get_inputs()[0].shape model_input_shape = self.model.get_inputs()[0].shape
tensor_input = preprocess(tensor_input, model_input_shape, np.float32)
# ruff: noqa: F841
tensor_output = self.model.run(None, {model_input_name: tensor_input})[0]
raise Exception( # adjust input shape
"No models are currently supported via onnx. See the docs for more info." if self.onnx_model_type == ModelTypeEnum.yolonas:
) tensor_input = cv2.dnn.blobFromImage(
tensor_input[0],
1.0,
(model_input_shape[3], model_input_shape[2]),
None,
swapRB=self.onnx_model_px == PixelFormatEnum.bgr,
).astype(np.uint8)
tensor_output = self.model.run(None, {model_input_name: tensor_input})
if self.onnx_model_type == ModelTypeEnum.yolonas:
predictions = tensor_output[0]
detections = np.zeros((20, 6), np.float32)
for i, prediction in enumerate(predictions):
if i == 20:
break
(_, x_min, y_min, x_max, y_max, confidence, class_id) = prediction
# when running in GPU mode, empty predictions in the output have class_id of -1
if class_id < 0:
break
detections[i] = [
class_id,
confidence,
y_min / self.h,
x_min / self.w,
y_max / self.h,
x_max / self.w,
]
return detections
else:
raise Exception(
f"{self.onnx_model_type} is currently not supported for rocm. See the docs for more info on supported models."
)