mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-09 04:35:25 +03:00
Add yolov8 support to tensorrt
This commit is contained in:
parent
e4d0e222e3
commit
4ea2e16ecd
@ -12,16 +12,17 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/main/Dockerfile
|
dockerfile: docker/main/Dockerfile
|
||||||
# Use target devcontainer-trt for TensorRT dev
|
|
||||||
target: devcontainer
|
target: devcontainer
|
||||||
## Uncomment this block for nvidia gpu support
|
# Run this command to build the devcontainer with tensorrt support, then uncomment the image and deploy blocks and comment the build block above
|
||||||
|
# ARCH=amd64 docker buildx bake --load --file=docker/tensorrt/trt.hcl --set tensorrt.tags=frigate:devcontainer-trt devcontainer-trt
|
||||||
|
# image: frigate:devcontainer-trt
|
||||||
# deploy:
|
# deploy:
|
||||||
# resources:
|
# resources:
|
||||||
# reservations:
|
# reservations:
|
||||||
# devices:
|
# devices:
|
||||||
# - driver: nvidia
|
# - driver: nvidia
|
||||||
# count: 1
|
# count: 1
|
||||||
# capabilities: [gpu]
|
# capabilities: [gpu]
|
||||||
environment:
|
environment:
|
||||||
YOLO_MODELS: yolov7-320
|
YOLO_MODELS: yolov7-320
|
||||||
devices:
|
devices:
|
||||||
|
|||||||
@ -95,4 +95,5 @@ target "devcontainer-trt" {
|
|||||||
}
|
}
|
||||||
platforms = ["linux/amd64"]
|
platforms = ["linux/amd64"]
|
||||||
target = "devcontainer-trt"
|
target = "devcontainer-trt"
|
||||||
|
tags = ["frigate:devcontainer-trt"]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -15,7 +16,8 @@ from pydantic import Field
|
|||||||
from typing_extensions import Literal
|
from typing_extensions import Literal
|
||||||
|
|
||||||
from frigate.detectors.detection_api import DetectionApi
|
from frigate.detectors.detection_api import DetectionApi
|
||||||
from frigate.detectors.detector_config import BaseDetectorConfig
|
from frigate.detectors.detector_config import BaseDetectorConfig, ModelTypeEnum
|
||||||
|
from frigate.detectors.util import yolov8_postprocess
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -85,28 +87,34 @@ class TensorRtDetector(DetectionApi):
|
|||||||
e,
|
e,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.model_type == ModelTypeEnum.yolov8:
|
||||||
|
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
|
||||||
|
meta_len = int.from_bytes(
|
||||||
|
f.read(4), byteorder="little"
|
||||||
|
) # read metadata length
|
||||||
|
metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
|
||||||
|
model = runtime.deserialize_cuda_engine(f.read()) # read engine
|
||||||
|
return model
|
||||||
|
|
||||||
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
|
with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime:
|
||||||
return runtime.deserialize_cuda_engine(f.read())
|
return runtime.deserialize_cuda_engine(f.read())
|
||||||
|
|
||||||
def _get_input_shape(self):
|
def _get_input_output_shape(self):
|
||||||
"""Get input shape of the TensorRT YOLO engine."""
|
"""Get input shape of the TensorRT YOLO engine."""
|
||||||
binding = self.engine[0]
|
input_shape = None
|
||||||
assert self.engine.binding_is_input(binding)
|
output_shape = None
|
||||||
binding_dims = self.engine.get_binding_shape(binding)
|
for i in range(self.engine.num_bindings):
|
||||||
if len(binding_dims) == 4:
|
name = self.engine.get_tensor_name(i)
|
||||||
return (
|
shape = (
|
||||||
tuple(binding_dims[2:]),
|
tuple(self.engine.get_binding_shape(name)),
|
||||||
trt.nptype(self.engine.get_binding_dtype(binding)),
|
trt.nptype(self.engine.get_binding_dtype(name)),
|
||||||
)
|
|
||||||
elif len(binding_dims) == 3:
|
|
||||||
return (
|
|
||||||
tuple(binding_dims[1:]),
|
|
||||||
trt.nptype(self.engine.get_binding_dtype(binding)),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"bad dims of binding %s: %s" % (binding, str(binding_dims))
|
|
||||||
)
|
)
|
||||||
|
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
|
||||||
|
input_shape = shape
|
||||||
|
else:
|
||||||
|
output_shape = shape
|
||||||
|
assert output_shape is not None and input_shape is not None
|
||||||
|
return input_shape, output_shape
|
||||||
|
|
||||||
def _allocate_buffers(self):
|
def _allocate_buffers(self):
|
||||||
"""Allocates all host/device in/out buffers required for an engine."""
|
"""Allocates all host/device in/out buffers required for an engine."""
|
||||||
@ -217,8 +225,9 @@ class TensorRtDetector(DetectionApi):
|
|||||||
self.nms_threshold = 0.4
|
self.nms_threshold = 0.4
|
||||||
err, self.stream = cuda.cuStreamCreate(0)
|
err, self.stream = cuda.cuStreamCreate(0)
|
||||||
self.trt_logger = TrtLogger()
|
self.trt_logger = TrtLogger()
|
||||||
|
self.model_type = detector_config.model.model_type
|
||||||
self.engine = self._load_engine(detector_config.model.path)
|
self.engine = self._load_engine(detector_config.model.path)
|
||||||
self.input_shape = self._get_input_shape()
|
self.input_shape, self.output_shape = self._get_input_output_shape()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.context = self.engine.create_execution_context()
|
self.context = self.engine.create_execution_context()
|
||||||
@ -261,7 +270,9 @@ class TensorRtDetector(DetectionApi):
|
|||||||
# filter low-conf detections and concatenate results of all yolo layers
|
# filter low-conf detections and concatenate results of all yolo layers
|
||||||
detections = []
|
detections = []
|
||||||
for o in trt_outputs:
|
for o in trt_outputs:
|
||||||
|
# group outputs into arrs of 7
|
||||||
dets = o.reshape((-1, 7))
|
dets = o.reshape((-1, 7))
|
||||||
|
# box_confidence x class_prob >= conf_th
|
||||||
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
|
dets = dets[dets[:, 4] * dets[:, 6] >= conf_th]
|
||||||
detections.append(dets)
|
detections.append(dets)
|
||||||
detections = np.concatenate(detections, axis=0)
|
detections = np.concatenate(detections, axis=0)
|
||||||
@ -284,6 +295,10 @@ class TensorRtDetector(DetectionApi):
|
|||||||
tensor_input.astype(self.input_shape[-1])
|
tensor_input.astype(self.input_shape[-1])
|
||||||
)
|
)
|
||||||
trt_outputs = self._do_inference()
|
trt_outputs = self._do_inference()
|
||||||
|
if self.model_type == ModelTypeEnum.yolov8:
|
||||||
|
return yolov8_postprocess(
|
||||||
|
self.input_shape[0], trt_outputs[0].reshape(self.output_shape[0])
|
||||||
|
)
|
||||||
|
|
||||||
raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
|
raw_detections = self._postprocess_yolo(trt_outputs, self.conf_th)
|
||||||
|
|
||||||
@ -298,10 +313,13 @@ class TensorRtDetector(DetectionApi):
|
|||||||
# Reorder elements by the score, best on top, remove class_prob
|
# Reorder elements by the score, best on top, remove class_prob
|
||||||
ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
|
ordered = raw_detections[raw_detections[:, 4].argsort()[::-1]][:, 0:6]
|
||||||
# transform width to right with clamp to 0..1
|
# transform width to right with clamp to 0..1
|
||||||
|
# right of box
|
||||||
ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
|
ordered[:, 2] = np.clip(ordered[:, 2] + ordered[:, 0], 0, 1)
|
||||||
# transform height to bottom with clamp to 0..1
|
# transform height to bottom with clamp to 0..1
|
||||||
|
# bottom of box
|
||||||
ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
|
ordered[:, 3] = np.clip(ordered[:, 3] + ordered[:, 1], 0, 1)
|
||||||
# put result into the correct order and limit to top 20
|
# put result into the correct order and limit to top 20
|
||||||
|
# [class_id, box_confidence, y_min/h, x_min/w, y_max/h, x_max/w]
|
||||||
detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
|
detections = ordered[:, [5, 4, 1, 0, 3, 2]][:20]
|
||||||
|
|
||||||
# pad to 20x6 shape
|
# pad to 20x6 shape
|
||||||
|
|||||||
@ -79,5 +79,6 @@ def yolov8_postprocess(
|
|||||||
np.argpartition(detections[:, 1], -box_count)[-box_count:]
|
np.argpartition(detections[:, 1], -box_count)[-box_count:]
|
||||||
]
|
]
|
||||||
detections = detections.copy()
|
detections = detections.copy()
|
||||||
detections.resize((box_count, 6))
|
# sort detections by confidence
|
||||||
return detections
|
detections = detections[detections[:, 1].argsort()[::-1]]
|
||||||
|
return np.resize(detections, (box_count, 6))
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user