Add yolov8 support to tensorrt

This commit is contained in:
alexyao2015 2024-02-21 01:15:29 -06:00
parent e4d0e222e3
commit 4ea2e16ecd
4 changed files with 50 additions and 29 deletions

View File

@ -12,16 +12,17 @@ services:
build: build:
context: . context: .
dockerfile: docker/main/Dockerfile dockerfile: docker/main/Dockerfile
# Use target devcontainer-trt for TensorRT dev
target: devcontainer target: devcontainer
## Uncomment this block for nvidia gpu support # Run this command to build the devcontainer with tensorrt support, then uncomment the image and deploy blocks and comment the build block above
# ARCH=amd64 docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:devcontainer-trt devcontainer-trt
# image: frigate:devcontainer-trt
# deploy: # deploy:
# resources: # resources:
# reservations: # reservations:
# devices: # devices:
# - driver: nvidia # - driver: nvidia
# count: 1 # count: 1
# capabilities: [gpu] # capabilities: [gpu]
environment: environment:
YOLO_MODELS: yolov7-320 YOLO_MODELS: yolov7-320
devices: devices:

View File

@ -95,4 +95,5 @@ target "devcontainer-trt" {
} }
platforms = ["linux/amd64"] platforms = ["linux/amd64"]
target = "devcontainer-trt" target = "devcontainer-trt"
tags = ["frigate:devcontainer-trt"]
} }

View File

@ -1,4 +1,5 @@
import ctypes import ctypes
import json
import logging import logging
import numpy as np import numpy as np
@ -15,7 +16,8 @@ from pydantic import Field
from typing_extensions import Literal from typing_extensions import Literal
from frigate.detectors.detection_api import DetectionApi from frigate.detectors.detection_api import DetectionApi
from frigate.detectors.detector_config import BaseDetectorConfig from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
from frigate.detectors.util import yolov8_postprocess
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -85,28 +87,34 @@ class TensorRtDetector(DetectionApi):
e, e,
) )
if self.model_type == ModelTypeEnum.yolov8:
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
meta_len = int.from_bytes(
f.read(4), byteorder="little"
) # read metadata length
metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
model = runtime.deserialize_cuda_engine(f.read()) # read engine
return model
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime: with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
return runtime.deserialize_cuda_engine(f.read()) return runtime.deserialize_cuda_engine(f.read())
def _get_input_shape(self): def _get_input_output_shape(self):
"""Get input shape of the TensorRT YOLO engine.""" """Get input shape of the TensorRT YOLO engine."""
binding = self.engine[0] input_shape = None
assert self.engine.binding_is_input(binding) output_shape = None
binding_dims = self.engine.get_binding_shape(binding) for i in range(self.engine.num_bindings):
if len(binding_dims) == 4: name = self.engine.get_tensor_name(i)
return ( shape = (
tuple(binding_dims[2:]), tuple(self.engine.get_binding_shape(name)),
trt.nptype(self.engine.get_binding_dtype(binding)), trt.nptype(self.engine.get_binding_dtype(name)),
)
elif len(binding_dims) == 3:
return (
tuple(binding_dims[1:]),
trt.nptype(self.engine.get_binding_dtype(binding)),
)
else:
raise ValueError(
"bad dims of binding %s: %s" % (binding, str(binding_dims))
) )
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
input_shape = shape
else:
output_shape = shape
assert output_shape is not None and input_shape is not None
return input_shape, output_shape
def _allocate_buffers(self): def _allocate_buffers(self):
"""Allocates all host/device in/out buffers required for an engine.""" """Allocates all host/device in/out buffers required for an engine."""
@ -217,8 +225,9 @@ class TensorRtDetector(DetectionApi):
self.nms_threshold = 0.4 self.nms_threshold = 0.4
err, self.stream = cuda.cuStreamCreate(0) err, self.stream = cuda.cuStreamCreate(0)
self.trt_logger = TrtLogger() self.trt_logger = TrtLogger()
self.model_type = detector_config.model.model_type
self.engine = self._load_engine(detector_config.model.path) self.engine = self._load_engine(detector_config.model.path)
self.input_shape = self._get_input_shape() self.input_shape, self.output_shape = self._get_input_output_shape()
try: try:
self.context = self.engine.create_execution_context() self.context = self.engine.create_execution_context()
@ -261,7 +270,9 @@ class TensorRtDetector(DetectionApi):
# filter low-conf detections and concatenate results of all yolo layers # filter low-conf detections and concatenate results of all yolo layers
detections = [] detections = []
for o in trt_outputs: for o in trt_outputs:
# group outputs into arrs of 7
dets = o.reshape((-1, 7)) dets = o.reshape((-1, 7))
# box_confidence x class_prob >= conf_th
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th] dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
detections.append(dets) detections.append(dets)
detections = np.concatenate(detections, axis=0) detections = np.concatenate(detections, axis=0)
@ -284,6 +295,10 @@ class TensorRtDetector(DetectionApi):
tensor_input.astype(self.input_shape[-1]) tensor_input.astype(self.input_shape[-1])
) )
trt_outputs = self._do_inference() trt_outputs = self._do_inference()
if self.model_type == ModelTypeEnum.yolov8:
return yolov8_postprocess(
self.input_shape[0], trt_outputs[0].reshape(self.output_shape[0])
)
raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th) raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
@ -298,10 +313,13 @@ class TensorRtDetector(DetectionApi):
# Reorder elements by the score, best on top, remove class_prob # Reorder elements by the score, best on top, remove class_prob
ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6] ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
# transform width to right with clamp to 0..1 # transform width to right with clamp to 0..1
# right of box
ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1) ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
# transform height to bottom with clamp to 0..1 # transform height to bottom with clamp to 0..1
# bottom of box
ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1) ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
# put result into the correct order and limit to top 20 # put result into the correct order and limit to top 20
# [class_id, box_confidence, y_min/h, x_min/w, y_max/h, x_max/w]
detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20] detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
# pad to 20x6 shape # pad to 20x6 shape

View File

@ -79,5 +79,6 @@ def yolov8_postprocess(
np.argpartition(detections[:, 1], -box_count)[-box_count:] np.argpartition(detections[:, 1], -box_count)[-box_count:]
] ]
detections = detections.copy() detections = detections.copy()
detections.resize((box_count, 6)) # sort detections by confidence
return detections detections = detections[detections[:, 1].argsort()[::-1]]
return np.resize(detections, (box_count, 6))