integrate yolov5 and yolov5_pytorch

This commit is contained in:
Andreas Franzén 2022-04-13 19:25:18 +02:00
parent a292f272e9
commit 112820826f
9 changed files with 951 additions and 26 deletions

View File

@ -11,10 +11,11 @@ services:
volumes:
- /etc/localtime:/etc/localtime:ro
- .:/lab/frigate:cached
- ./config/config.yml:/config/config.yml:ro
- ./config/config.yml:/config/config.yml:rw
- ./debug:/media/frigate
- /dev/bus/usb:/dev/bus/usb
- /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware
- ./frigate:/opt/frigate/frigate
#- /dev/bus/usb:/dev/bus/usb
#- /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware
ports:
- "1935:1935"
- "5000:5000"

View File

@ -26,3 +26,40 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo
- Labels: `/labelmap.txt`
You also need to update the [model config](/configuration/advanced#model) if they differ from the defaults.
You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU.
A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models)
Add it as a volume mount in your docker-compose file:
```yaml
volumes:
- /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite
```
And then set the configuration for the model in config.yml:
```yaml
model:
# Required: height of the trained model
height: 416
# Required: width of the trained model
width: 416
# Required: type of model (ssd or yolo)
model_type: 'yolo'
# Required: path of label map
label_path: '/labelmap.txt'
# Optional: (but required for yolo) - anchors, comma separated
anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
```
### Customizing the Labelmap
The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change:
- Download the [COCO labelmap](https://dl.google.com/coral/canned_models/coco_labels.txt)
- Modify the label names as desired. For example, change `7 truck` to `7 car`
- Mount the new file at `/labelmap.txt` in the container with an additional volume
```
-v ./config/labelmap.txt:/labelmap.txt
```

View File

@ -2,6 +2,7 @@ import json
import logging
import multiprocessing as mp
import os
import pprint
import signal
import sys
import threading
@ -158,8 +159,7 @@ class FrigateApp:
self.mqtt_relay.start()
def start_detectors(self):
model_path = self.config.model.path
model_shape = (self.config.model.height, self.config.model.width)
for name in self.config.cameras.keys():
self.detection_out_events[name] = mp.Event()
@ -188,8 +188,7 @@ class FrigateApp:
name,
self.detection_queue,
self.detection_out_events,
model_path,
model_shape,
self.config.model,
"cpu",
detector.num_threads,
)
@ -198,8 +197,7 @@ class FrigateApp:
name,
self.detection_queue,
self.detection_out_events,
model_path,
model_shape,
self.config.model,
detector.device,
detector.num_threads,
)
@ -310,6 +308,7 @@ class FrigateApp:
try:
try:
self.init_config()
pprint.pprint(self.config)
except Exception as e:
print("*************************************************************")
print("*************************************************************")

View File

@ -653,6 +653,8 @@ class DatabaseConfig(FrigateBaseModel):
class ModelConfig(FrigateBaseModel):
path: Optional[str] = Field(title="Custom Object detection model path.")
type: str = Field(default="ssd", title="Model type")
anchors: Optional[str] = Field(default="", title="Optional but required for yolo3")
labelmap_path: Optional[str] = Field(title="Label map for custom object detector.")
width: int = Field(default=320, title="Object detection model input width.")
height: int = Field(default=320, title="Object detection model input height.")

View File

@ -14,10 +14,33 @@ from setproctitle import setproctitle
from tflite_runtime.interpreter import load_delegate
from frigate.util import EventsPerSecond, SharedMemoryFrameManager, listen, load_labels
from frigate.yolov5.edgetpumodel import EdgeTPUModel
logger = logging.getLogger(__name__)
def load_labels(path, encoding='utf-8'):
"""Loads labels from file (with or without index numbers).
Args:
path: path to label file.
encoding: label file encoding.
Returns:
Dictionary mapping indices to labels.
"""
logger.warn(f"Loaded labels from {path}")
with open(path, 'r', encoding=encoding) as f:
lines = f.readlines()
if not lines:
return {}
if lines[0].split(' ', maxsplit=1)[0].isdigit():
pairs = [line.split(' ', maxsplit=1) for line in lines]
return {int(index): label.strip() for index, label in pairs}
else:
return {index: line.strip() for index, line in enumerate(lines)}
class ObjectDetector(ABC):
@abstractmethod
def detect(self, tensor_input, threshold=0.4):
@ -25,13 +48,22 @@ class ObjectDetector(ABC):
class LocalObjectDetector(ObjectDetector):
def __init__(self, tf_device=None, model_path=None, num_threads=3, labels=None):
def __init__(self, model_config, tf_device=None, num_threads=3):
self.fps = EventsPerSecond()
if labels is None:
self.labels = {}
else:
self.labels = load_labels(labels)
if model_config.labelmap_path:
self.labels = load_labels(model_config.labelmap_path)
self.model_config = model_config
if self.model_config.type == 'yolov5':
model = EdgeTPUModel(model_config.path, None)
input_size = model.get_image_size()
x = (255 * np.random.random((3, *input_size))).astype(np.uint8)
model.forward(x)
self.yolov5Model = model
if self.model_config.type == 'yolov5_pytorch':
from frigate.yolov5_pytorch import ObjectDetection as Yolov5ObjectDetector
self.yolov5ObjectDetector = Yolov5ObjectDetector()
device_config = {"device": "usb"}
if not tf_device is None:
device_config = {"device": tf_device}
@ -44,7 +76,7 @@ class LocalObjectDetector(ObjectDetector):
edge_tpu_delegate = load_delegate("libedgetpu.so.1.0", device_config)
logger.info("TPU found")
self.interpreter = tflite.Interpreter(
model_path=model_path or "/edgetpu_model.tflite",
model_path=model_config.path or "/edgetpu_model.tflite",
experimental_delegates=[edge_tpu_delegate],
)
except ValueError:
@ -57,7 +89,7 @@ class LocalObjectDetector(ObjectDetector):
"CPU detectors are not recommended and should only be used for testing or for trial purposes."
)
self.interpreter = tflite.Interpreter(
model_path=model_path or "/cpu_model.tflite", num_threads=num_threads
model_path=model_config.path or "/cpu_model.tflite", num_threads=num_threads
)
self.interpreter.allocate_tensors()
@ -65,6 +97,11 @@ class LocalObjectDetector(ObjectDetector):
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
if model_config.anchors != "":
anchors = [float(x) for x in model_config.anchors.split(',')]
self.anchors = np.array(anchors).reshape(-1, 2)
def detect(self, tensor_input, threshold=0.4):
detections = []
@ -79,7 +116,104 @@ class LocalObjectDetector(ObjectDetector):
self.fps.update()
return detections
def sigmoid(self, x):
return 1. / (1 + np.exp(-x))
def detect_raw(self, tensor_input):
if self.model_config.type == "ssd":
raw_detections = self.detect_ssd(tensor_input)
elif self.model_config.type == "yolov3":
raw_detections = self.detect_yolov3(tensor_input)
elif self.model_config.type == "yolov5":
raw_detections = self.detect_yolov5(tensor_input)
elif self.model_config.type == "yolov5_pytorch":
raw_detections = self.detect_yolov5_pytorch(tensor_input)
else:
logger.error(f"Unsupported model type {self.model_config.type}")
raw_detections = []
return raw_detections
def get_interpreter_details(self):
# Get input and output tensor details
input_details = self.interpreter.get_input_details()
output_details = self.interpreter.get_output_details()
input_shape = input_details[0]["shape"]
return input_details, output_details, input_shape
# from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo
def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape):
grid_shape = outputs.shape[1:3]
n_anchors = len(anchors)
# Numpy screwaround to get the boxes in reasonable amount of time
grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32)
grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32)
outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1)
_anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32)
# Get box parameters from network output and apply transformations
bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0]
by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1]
# Should these be inverted?
bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2]))
bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3]))
# Get the scores
scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \
self.sigmoid(outputs[..., 5:])
scores = scores.reshape(-1, n_classes)
# TODO: some of these are probably not needed but I don't understand numpy magic well enough
bx = bx.flatten()
by = (by.flatten()) * 1
bw = bw.flatten()
bh = bh.flatten() * 1
half_bw = bw / 2.
half_bh = bh / 2.
tl_x = np.multiply(bx - half_bw, 1)
tl_y = np.multiply(by - half_bh, 1)
br_x = np.multiply(bx + half_bw, 1)
br_y = np.multiply(by + half_bh, 1)
# Get indices of boxes with score higher than threshold
indices = np.argwhere(scores >= 0.5)
selected_boxes = []
selected_scores = []
for i in indices:
i = tuple(i)
selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) )
selected_scores.append(scores[i])
selected_boxes = np.array(selected_boxes)
selected_scores = np.array(selected_scores)
selected_classes = indices[:, 1]
return selected_boxes, selected_scores, selected_classes
def detect_yolov5(self, tensor_input):
tensor_input = np.squeeze(tensor_input, axis=0)
results = self.yolov5Model.forward(tensor_input)
print(self.yolov5Model.get_last_inference_time())
det = results[0]
detections = np.zeros((20, 6), np.float32)
i = 0
for *xyxy, conf, cls in reversed(det):
detections[i] = [
int(cls)+1,
float(conf),
xyxy[1],
xyxy[0],
xyxy[3],
xyxy[2],
]
i += 1
return detections
def detect_ssd(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
self.interpreter.invoke()
@ -106,6 +240,69 @@ class LocalObjectDetector(ObjectDetector):
return detections
def detect_yolov5_pytorch(self, tensor_input):
tensor_input = np.squeeze(tensor_input, axis=0)
results = self.yolov5ObjectDetector.score_frame(tensor_input)
labels, cord = results
n = len(labels)
detections = np.zeros((20, 6), np.float32)
if n > 0:
print(f"Total Targets: {n}")
print(f"Labels: {set([self.yolov5ObjectDetector.class_to_label(label) for label in labels])}")
for i in range(n):
if i < 20:
row = cord[i]
score = float(row[4])
if score < 0.4:
break
x1, y1, x2, y2 = row[0], row[1], row[2], row[3]
label = self.yolov5ObjectDetector.class_to_label(labels[i])
#detections[i] = [labels[i]+1, score, x1, y1, x2, y2]
detections[i] = [labels[i]+1, score, y1, x1, y2, x2]
print(detections[i])
return detections
def detect_yolov3(self, tensor_input):
input_details, output_details, net_input_shape = \
self.get_interpreter_details()
self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
self.interpreter.invoke()
# for yolo, it's a little diffrent
out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index'])
out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index'])
# Dequantize output (tpu only)
o1_scale, o1_zero = self.tensor_output_details[0]['quantization']
out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
o2_scale, o2_zero = self.tensor_output_details[1]['quantization']
out2 = (out2.astype(np.float32) - o2_zero) * o2_scale
num_classes = len(self.labels)
_boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape)
_boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]], len(self.labels), net_input_shape)
if _boxes1.shape[0] == 0:
_boxes1 = np.empty([0, 2, 2])
_scores1 = np.empty([0,])
_classes1 = np.empty([0,])
if _boxes2.shape[0] == 0:
_boxes2 = np.empty([0, 2, 2])
_scores2 = np.empty([0,])
_classes2 = np.empty([0,])
boxes = np.append(_boxes1, _boxes2, axis=0)
scores = np.append(_scores1, _scores2, axis=0)
label_codes = np.append(_classes1, _classes2, axis=0)
detections = np.zeros((20,6), np.float32)
for i, score in enumerate(scores):
if i < 20:
detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]]
return detections
def run_detector(
name: str,
@ -113,8 +310,7 @@ def run_detector(
out_events: Dict[str, mp.Event],
avg_speed,
start,
model_path,
model_shape,
model_config,
tf_device,
num_threads,
):
@ -134,7 +330,7 @@ def run_detector(
frame_manager = SharedMemoryFrameManager()
object_detector = LocalObjectDetector(
tf_device=tf_device, model_path=model_path, num_threads=num_threads
model_config, tf_device=tf_device, num_threads=num_threads
)
outputs = {}
@ -149,7 +345,7 @@ def run_detector(
except queue.Empty:
continue
input_frame = frame_manager.get(
connection_id, (1, model_shape[0], model_shape[1], 3)
connection_id, (1, model_config.height, model_config.width, 3)
)
if input_frame is None:
@ -172,8 +368,7 @@ class EdgeTPUProcess:
name,
detection_queue,
out_events,
model_path,
model_shape,
model_config,
tf_device=None,
num_threads=3,
):
@ -183,10 +378,11 @@ class EdgeTPUProcess:
self.avg_inference_speed = mp.Value("d", 0.01)
self.detection_start = mp.Value("d", 0.0)
self.detect_process = None
self.model_path = model_path
self.model_shape = model_shape
self.model_path = model_config.path
self.model_shape = (model_config.height, model_config.width)
self.tf_device = tf_device
self.num_threads = num_threads
self.model_config = model_config
self.start_or_restart()
def stop(self):
@ -211,8 +407,7 @@ class EdgeTPUProcess:
self.out_events,
self.avg_inference_speed,
self.detection_start,
self.model_path,
self.model_shape,
self.model_config,
self.tf_device,
self.num_threads,
),

View File

@ -0,0 +1,318 @@
import time
import os
import sys
import logging
import yaml
import numpy as np
import pycoral.utils.edgetpu as etpu
from pycoral.adapters import common
from frigate.yolov5.nms import non_max_suppression
import cv2
import json
import tflite_runtime.interpreter as tflite
from frigate.yolov5.utils import plot_one_box, Colors, get_image_tensor
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("EdgeTPUModel")
class EdgeTPUModel:
def __init__(self, model_file, names_file, conf_thresh=0.25, iou_thresh=0.45, desktop=True, filter_classes=None,
agnostic_nms=False, max_det=1000):
"""
Creates an object for running a Yolov5 model on an EdgeTPU or a Desktop
Inputs:
- model_file: path to edgetpu-compiled tflite file
- names_file: yaml names file (yolov5 format)
- conf_thresh: detection threshold
- iou_thresh: NMS threshold
- desktop: option to run model on a desktop
- filter_classes: only output certain classes
- agnostic_nms: use class-agnostic NMS
- max_det: max number of detections
"""
model_file = os.path.abspath(model_file)
if not model_file.endswith('tflite'):
model_file += ".tflite"
self.model_file = model_file
self.conf_thresh = conf_thresh
self.iou_thresh = iou_thresh
self.desktop = desktop
self.filter_classes = filter_classes
self.agnostic_nms = agnostic_nms
self.max_det = 1000
logger.info("Confidence threshold: {}".format(conf_thresh))
logger.info("IOU threshold: {}".format(iou_thresh))
self.inference_time = None
self.nms_time = None
self.interpreter = None
self.colors = Colors() # create instance for 'from utils.plots import colors'
#self.get_names(names_file)
self.names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
self.make_interpreter()
self.get_image_size()
def get_names(self, path):
"""
Load a names file
Inputs:
- path: path to names file in yaml format
"""
with open(path, 'r') as f:
cfg = yaml.load(f, Loader=yaml.SafeLoader)
names = cfg['names']
logger.info("Loaded {} classes".format(len(names)))
self.names = names
def make_interpreter(self):
"""
Internal function that loads the tflite file and creates
the interpreter that deals with the EdgeTPU hardware.
"""
# Load the model and allocate
# Choose desktop or EdgTPU
if self.desktop:
self.interpreter = tflite.Interpreter(self.model_file)
else:
self.interpreter = etpu.make_interpreter(self.model_file)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
logger.debug(self.input_details)
logger.debug(self.output_details)
self.input_zero = self.input_details[0]['quantization'][1]
self.input_scale = self.input_details[0]['quantization'][0]
self.output_zero = self.output_details[0]['quantization'][1]
self.output_scale = self.output_details[0]['quantization'][0]
# If the model isn't quantized then these should be zero
# Check against small epsilon to avoid comparing float/int
if self.input_scale < 1e-9:
self.input_scale = 1.0
if self.output_scale < 1e-9:
self.output_scale = 1.0
logger.debug("Input scale: {}".format(self.input_scale))
logger.debug("Input zero: {}".format(self.input_zero))
logger.debug("Output scale: {}".format(self.output_scale))
logger.debug("Output zero: {}".format(self.output_zero))
logger.info("Successfully loaded {}".format(self.model_file))
def get_image_size(self):
"""
Returns the expected size of the input image tensor
"""
if self.interpreter is not None:
self.input_size = common.input_size(self.interpreter)
logger.debug("Expecting input shape: {}".format(self.input_size))
return self.input_size
else:
logger.warn("Interpreter is not yet loaded")
def predict(self, image_path, save_img=True, save_txt=True):
logger.info("Attempting to load {}".format(image_path))
full_image, net_image, pad = get_image_tensor(image_path, self.input_size[0])
pred = self.forward(net_image)
logger.info("Inference time: {}".format(self.inference_time))
base, ext = os.path.splitext(image_path)
output_path = base + "_detect" + ext
det = self.process_predictions(pred[0], full_image, pad, output_path, save_img=save_img, save_txt=save_txt)
return det
def forward(self, x: np.ndarray, with_nms=True) -> np.ndarray:
"""
Predict function using the EdgeTPU
Inputs:
x: (C, H, W) image tensor
with_nms: apply NMS on output
Returns:
prediction array (with or without NMS applied)
"""
tstart = time.time()
# Transpose if C, H, W
if x.shape[0] == 3:
x = x.transpose((1, 2, 0))
x = x.astype('float32')
# Scale input, conversion is: real = (int_8 - zero)*scale
x = (x / self.input_scale) + self.input_zero
x = x[np.newaxis].astype(np.uint8)
self.interpreter.set_tensor(self.input_details[0]['index'], x)
self.interpreter.invoke()
# Scale output
result = (common.output_tensor(self.interpreter, 0).astype('float32') - self.output_zero) * self.output_scale
self.inference_time = time.time() - tstart
if with_nms:
tstart = time.time()
nms_result = non_max_suppression(result, self.conf_thresh, self.iou_thresh, self.filter_classes,
self.agnostic_nms, max_det=self.max_det)
self.nms_time = time.time() - tstart
return nms_result
else:
return result
def get_last_inference_time(self, with_nms=True):
"""
Returns a tuple containing most recent inference and NMS time
"""
res = [self.inference_time]
if with_nms:
res.append(self.nms_time)
return res
def get_scaled_coords(self, xyxy, output_image, pad):
"""
Converts raw prediction bounding box to orginal
image coordinates.
Args:
xyxy: array of boxes
output_image: np array
pad: padding due to image resizing (pad_w, pad_h)
"""
pad_w, pad_h = pad
in_h, in_w = self.input_size
out_h, out_w, _ = output_image.shape
ratio_w = out_w / (in_w - pad_w)
ratio_h = out_h / (in_h - pad_h)
out = []
for coord in xyxy:
x1, y1, x2, y2 = coord
x1 *= in_w * ratio_w
x2 *= in_w * ratio_w
y1 *= in_h * ratio_h
y2 *= in_h * ratio_h
x1 = max(0, x1)
x2 = min(out_w, x2)
y1 = max(0, y1)
y2 = min(out_h, y2)
out.append((x1, y1, x2, y2))
return np.array(out).astype(int)
def process_predictions2(self, det):
"""
Process predictions and optionally output an image with annotations
"""
if len(det):
# Rescale boxes from img_size to im0 size
# x1, y1, x2, y2=
#det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad)
output = {}
#base, ext = os.path.splitext(output_path)
s = ""
# Print results
for c in np.unique(det[:, -1]):
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if s != "":
s = s.strip()
s = s[:-1]
logger.info("Detected: {}".format(s))
for *xyxy, conf, cls in reversed(det):
output = {}
output['box'] = xyxy
output['conf'] = conf
output['cls'] = cls
output['cls_name'] = self.names[c]
return output
def process_predictions(self, det, output_image=None, pad=(0, 0), output_path="detection.jpg", save_img=False, save_txt=False,
hide_labels=False, hide_conf=False):
"""
Process predictions and optionally output an image with annotations
"""
if len(det):
# Rescale boxes from img_size to im0 size
# x1, y1, x2, y2=
det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad)
output = {}
base, ext = os.path.splitext(output_path)
s = ""
# Print results
for c in np.unique(det[:, -1]):
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if s != "":
s = s.strip()
s = s[:-1]
logger.info("Detected: {}".format(s))
# Write results
for *xyxy, conf, cls in reversed(det):
if save_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}')
output_image = plot_one_box(xyxy, output_image, label=label, color=self.colors(c, True))
if save_txt:
output[base] = {}
output[base]['box'] = xyxy
output[base]['conf'] = conf
output[base]['cls'] = cls
output[base]['cls_name'] = self.names[c]
if save_txt:
output_txt = base + "txt"
with open(output_txt, 'w') as f:
json.dump(output, f, indent=1)
if save_img:
cv2.imwrite(output_path, output_image)
return det

142
frigate/yolov5/nms.py Normal file
View File

@ -0,0 +1,142 @@
import numpy as np
import time
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def nms(dets, scores, thresh):
'''
dets is a numpy array : num_dets, 4
scores ia nump array : num_dets,
'''
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
areas = (x2 - x1 + 1e-9) * (y2 - y1 + 1e-9)
order = scores.argsort()[::-1] # get boxes with more ious first
keep = []
while order.size > 0:
i = order[0] # pick maxmum iou box
other_box_ids = order[1:]
keep.append(i)
xx1 = np.maximum(x1[i], x1[other_box_ids])
yy1 = np.maximum(y1[i], y1[other_box_ids])
xx2 = np.minimum(x2[i], x2[other_box_ids])
yy2 = np.minimum(y2[i], y2[other_box_ids])
# print(list(zip(xx1, yy1, xx2, yy2)))
w = np.maximum(0.0, xx2 - xx1 + 1e-9) # maximum width
h = np.maximum(0.0, yy2 - yy1 + 1e-9) # maxiumum height
inter = w * h
ovr = inter / (areas[i] + areas[other_box_ids] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return np.array(keep)
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300):
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [np.zeros((0, 6))] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
l = labels[xi]
v = np.zeros((len(l), nc + 5))
v[:, :4] = l[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(float)), axis=1)
else: # best class only
conf = np.amax(x[:, 5:], axis=1, keepdims=True)
j = np.argmax(x[:, 5:], axis=1).reshape(conf.shape)
x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.flatten() > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = np.dot(weights, x[:, :4]).astype(float) / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output

120
frigate/yolov5/utils.py Normal file
View File

@ -0,0 +1,120 @@
import os
import sys
import argparse
import logging
import time
from pathlib import Path
import numpy as np
import cv2
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb('#' + c) for c in hex]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
def plot_one_box(box, im, color=(128, 128, 128), txt_color=(255, 255, 255), label=None, line_width=3):
# Plots one xyxy box on image im with label
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
lw = line_width or max(int(min(im.size) / 200), 2) # line width
c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(im, c1, c2, color, thickness=lw, lineType=cv2.LINE_AA)
if label:
tf = max(lw - 1, 1) # font thickness
txt_width, txt_height = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0]
c2 = c1[0] + txt_width, c1[1] - txt_height - 3
cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) # filled
cv2.putText(im, label, (c1[0], c1[1] - 2), 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)
return im
def resize_and_pad(image, desired_size):
old_size = image.shape[:2]
ratio = float(desired_size / max(old_size))
new_size = tuple([int(x * ratio) for x in old_size])
# new_size should be in (width, height) format
image = cv2.resize(image, (new_size[1], new_size[0]))
delta_w = desired_size - new_size[1]
delta_h = desired_size - new_size[0]
pad = (delta_w, delta_h)
color = [100, 100, 100]
new_im = cv2.copyMakeBorder(image, 0, delta_h, 0, delta_w, cv2.BORDER_CONSTANT,
value=color)
return new_im, pad
def get_image_tensor(img, max_size, debug=False):
"""
Reshapes an input image into a square with sides max_size
"""
if type(img) is str:
img = cv2.imread(img)
resized, pad = resize_and_pad(img, max_size)
resized = resized.astype(np.float32)
if debug:
cv2.imwrite("intermediate.png", resized)
# Normalise!
resized /= 255.0
return img, resized, pad
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
def save_one_json(predn, jdict, path, class_map):
# Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
for p, b in zip(predn.tolist(), box.tolist()):
jdict.append({'image_id': image_id,
'category_id': class_map[int(p[5])],
'bbox': [round(x, 3) for x in b],
'score': round(p[4], 5)})

111
frigate/yolov5_pytorch.py Normal file
View File

@ -0,0 +1,111 @@
import torch
import numpy as np
#import cv2
from time import time
import sys
class ObjectDetection:
"""
The class performs generic object detection on a video file.
It uses yolo5 pretrained model to make inferences and opencv2 to manage frames.
Included Features:
1. Reading and writing of video file using Opencv2
2. Using pretrained model to make inferences on frames.
3. Use the inferences to plot boxes on objects along with labels.
Upcoming Features:
"""
def __init__(self):
self.model = self.load_model()
self.model.conf = 0.4 # set inference threshold at 0.3
self.model.iou = 0.3 # set inference IOU threshold at 0.3
#self.model.classes = [0] # set model to only detect "Person" class
#self.model.classes = self.model.names
self.classes = self.model.names
self.found_lables = set() # set
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
def load_model(self):
"""
Function loads the yolo5 model from PyTorch Hub.
"""
#model = torch.hub.load('/media/frigate/yolov5', 'custom', path='/media/frigate/yolov5/yolov5l.pt', source='local')
model = torch.hub.load('/media/frigate/yolov5', 'custom', path='/media/frigate/yolov5/yolov5s.pt', source='local')
#model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
#model = torch.hub.load('ultralytics/yolov3', 'yolov3', pretrained=True)
return model
def class_to_label(self, x):
"""
For a given label value, return corresponding string label.
:param x: numeric label
:return: corresponding string label
"""
return self.classes[int(x)]
def score_frame(self, frame):
"""
function scores each frame of the video and returns results.
:param frame: frame to be infered.
:return: labels and coordinates of objects found.
"""
self.model.to(self.device)
results = self.model(frame)
labels, cord = results.xyxyn[0][:, -1].to('cpu').numpy(), results.xyxyn[0][:, :-1].to('cpu').numpy()
return labels, cord
def plot_boxes(self, results, frame):
"""
plots boxes and labels on frame.
:param results: inferences made by model
:param frame: frame on which to make the plots
:return: new frame with boxes and labels plotted.
"""
labels, cord = results
n = len(labels)
if n > 0:
print(f"Total Targets: {n}")
print(f"Labels: {set([self.class_to_label(label) for label in labels])}")
x_shape, y_shape = frame.shape[1], frame.shape[0]
for i in range(n):
self.found_lables.add(self.class_to_label(labels[i]))
row = cord[i]
x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
bgr = (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 1)
label = f"{int(row[4]*100)}"
cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
cv2.putText(frame, f"Total Targets: {n}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
return frame
def __call__(self):
player = self.get_video_from_file() # create streaming service for application
assert player.isOpened()
x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH))
y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT))
four_cc = cv2.VideoWriter_fourcc(*"MJPG")
out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape))
fc = 0
fps = 0
tfc = int(player.get(cv2.CAP_PROP_FRAME_COUNT))
tfcc = 0
while True:
fc += 1
start_time = time()
ret, frame = player.read()
if not ret:
break
results = self.score_frame(frame)
frame = self.plot_boxes(results, frame)
end_time = time()
fps += 1/np.round(end_time - start_time, 3)
if fc == 10:
fps = int(fps / 10)
tfcc += fc
fc = 0
per_com = int(tfcc / tfc * 100)
print(f"Frames Per Second : {fps} || Percentage Parsed : {per_com}")
out.write(frame)
print(f"Found labels: {self.found_lables}")
player.release()