diff --git a/docker-compose.yml b/docker-compose.yml index 197d9e11e..f1060a05e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,10 +11,11 @@ services: volumes: - /etc/localtime:/etc/localtime:ro - .:/lab/frigate:cached - - ./config/config.yml:/config/config.yml:ro + - ./config/config.yml:/config/config.yml:rw - ./debug:/media/frigate - - /dev/bus/usb:/dev/bus/usb - - /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware + - ./frigate:/opt/frigate/frigate + #- /dev/bus/usb:/dev/bus/usb + #- /dev/dri:/dev/dri # for intel hwaccel, needs to be updated for your hardware ports: - "1935:1935" - "5000:5000" diff --git a/docs/docs/configuration/objects.mdx b/docs/docs/configuration/objects.mdx index 1d236bf42..f6b65e47f 100644 --- a/docs/docs/configuration/objects.mdx +++ b/docs/docs/configuration/objects.mdx @@ -26,3 +26,40 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo - Labels: `/labelmap.txt` You also need to update the [model config](/configuration/advanced#model) if they differ from the defaults. + +You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU. + +A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models) + +Add it as a volume mount in your docker-compose file: +```yaml + volumes: + - /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite +``` + +And then set the configuration for the model in config.yml: + +```yaml +model: + # Required: height of the trained model + height: 416 + # Required: width of the trained model + width: 416 + # Required: type of model (ssd or yolo) + model_type: 'yolo' + # Required: path of label map + label_path: '/labelmap.txt' + # Optional: (but required for yolo) - anchors, comma separated + anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319' +``` + +### Customizing the Labelmap + +The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change: + +- Download the [COCO labelmap](https://dl.google.com/coral/canned_models/coco_labels.txt) +- Modify the label names as desired. For example, change `7 truck` to `7 car` +- Mount the new file at `/labelmap.txt` in the container with an additional volume + ``` + -v ./config/labelmap.txt:/labelmap.txt + ``` diff --git a/frigate/app.py b/frigate/app.py index bf593d6ac..690ab6a93 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -2,6 +2,7 @@ import json import logging import multiprocessing as mp import os +import pprint import signal import sys import threading @@ -158,8 +159,7 @@ class FrigateApp: self.mqtt_relay.start() def start_detectors(self): - model_path = self.config.model.path - model_shape = (self.config.model.height, self.config.model.width) + for name in self.config.cameras.keys(): self.detection_out_events[name] = mp.Event() @@ -188,8 +188,7 @@ class FrigateApp: name, self.detection_queue, self.detection_out_events, - model_path, - model_shape, + self.config.model, "cpu", detector.num_threads, ) @@ -198,8 +197,7 @@ class FrigateApp: name, self.detection_queue, self.detection_out_events, - model_path, - model_shape, + self.config.model, detector.device, detector.num_threads, ) @@ -310,6 +308,7 @@ class FrigateApp: try: try: self.init_config() + pprint.pprint(self.config) except Exception as e: print("*************************************************************") print("*************************************************************") diff --git a/frigate/config.py b/frigate/config.py index e89b43003..5fada12e6 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -653,6 +653,8 @@ class DatabaseConfig(FrigateBaseModel): class ModelConfig(FrigateBaseModel): path: Optional[str] = Field(title="Custom Object detection model path.") + type: str = Field(default="ssd", title="Model type") + anchors: Optional[str] = Field(default="", title="Optional but required for yolo3") labelmap_path: Optional[str] = Field(title="Label map for custom object detector.") width: int = Field(default=320, title="Object detection model input width.") height: int = Field(default=320, title="Object detection model input height.") diff --git a/frigate/edgetpu.py b/frigate/edgetpu.py index 9849f8afc..a6b9b8484 100644 --- a/frigate/edgetpu.py +++ b/frigate/edgetpu.py @@ -14,10 +14,33 @@ from setproctitle import setproctitle from tflite_runtime.interpreter import load_delegate from frigate.util import EventsPerSecond, SharedMemoryFrameManager, listen, load_labels +from frigate.yolov5.edgetpumodel import EdgeTPUModel logger = logging.getLogger(__name__) +def load_labels(path, encoding='utf-8'): + """Loads labels from file (with or without index numbers). + Args: + path: path to label file. + encoding: label file encoding. + Returns: + Dictionary mapping indices to labels. + """ + logger.warn(f"Loaded labels from {path}") + with open(path, 'r', encoding=encoding) as f: + lines = f.readlines() + + if not lines: + return {} + + if lines[0].split(' ', maxsplit=1)[0].isdigit(): + pairs = [line.split(' ', maxsplit=1) for line in lines] + return {int(index): label.strip() for index, label in pairs} + else: + return {index: line.strip() for index, line in enumerate(lines)} + + class ObjectDetector(ABC): @abstractmethod def detect(self, tensor_input, threshold=0.4): @@ -25,13 +48,22 @@ class ObjectDetector(ABC): class LocalObjectDetector(ObjectDetector): - def __init__(self, tf_device=None, model_path=None, num_threads=3, labels=None): + def __init__(self, model_config, tf_device=None, num_threads=3): self.fps = EventsPerSecond() - if labels is None: - self.labels = {} - else: - self.labels = load_labels(labels) + if model_config.labelmap_path: + self.labels = load_labels(model_config.labelmap_path) + self.model_config = model_config + if self.model_config.type == 'yolov5': + model = EdgeTPUModel(model_config.path, None) + input_size = model.get_image_size() + x = (255 * np.random.random((3, *input_size))).astype(np.uint8) + model.forward(x) + self.yolov5Model = model + if self.model_config.type == 'yolov5_pytorch': + from frigate.yolov5_pytorch import ObjectDetection as Yolov5ObjectDetector + self.yolov5ObjectDetector = Yolov5ObjectDetector() + device_config = {"device": "usb"} if not tf_device is None: device_config = {"device": tf_device} @@ -44,7 +76,7 @@ class LocalObjectDetector(ObjectDetector): edge_tpu_delegate = load_delegate("libedgetpu.so.1.0", device_config) logger.info("TPU found") self.interpreter = tflite.Interpreter( - model_path=model_path or "/edgetpu_model.tflite", + model_path=model_config.path or "/edgetpu_model.tflite", experimental_delegates=[edge_tpu_delegate], ) except ValueError: @@ -57,7 +89,7 @@ class LocalObjectDetector(ObjectDetector): "CPU detectors are not recommended and should only be used for testing or for trial purposes." ) self.interpreter = tflite.Interpreter( - model_path=model_path or "/cpu_model.tflite", num_threads=num_threads + model_path=model_config.path or "/cpu_model.tflite", num_threads=num_threads ) self.interpreter.allocate_tensors() @@ -65,6 +97,11 @@ class LocalObjectDetector(ObjectDetector): self.tensor_input_details = self.interpreter.get_input_details() self.tensor_output_details = self.interpreter.get_output_details() + + if model_config.anchors != "": + anchors = [float(x) for x in model_config.anchors.split(',')] + self.anchors = np.array(anchors).reshape(-1, 2) + def detect(self, tensor_input, threshold=0.4): detections = [] @@ -79,7 +116,104 @@ class LocalObjectDetector(ObjectDetector): self.fps.update() return detections + def sigmoid(self, x): + return 1. / (1 + np.exp(-x)) + def detect_raw(self, tensor_input): + if self.model_config.type == "ssd": + raw_detections = self.detect_ssd(tensor_input) + elif self.model_config.type == "yolov3": + raw_detections = self.detect_yolov3(tensor_input) + elif self.model_config.type == "yolov5": + raw_detections = self.detect_yolov5(tensor_input) + elif self.model_config.type == "yolov5_pytorch": + raw_detections = self.detect_yolov5_pytorch(tensor_input) + else: + logger.error(f"Unsupported model type {self.model_config.type}") + raw_detections = [] + return raw_detections + + + def get_interpreter_details(self): + # Get input and output tensor details + input_details = self.interpreter.get_input_details() + output_details = self.interpreter.get_output_details() + input_shape = input_details[0]["shape"] + return input_details, output_details, input_shape + + # from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo + def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape): + grid_shape = outputs.shape[1:3] + n_anchors = len(anchors) + + # Numpy screwaround to get the boxes in reasonable amount of time + grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32) + grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32) + outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1) + _anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32) + + # Get box parameters from network output and apply transformations + bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0] + by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1] + # Should these be inverted? + bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2])) + bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3])) + + # Get the scores + scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \ + self.sigmoid(outputs[..., 5:]) + scores = scores.reshape(-1, n_classes) + + # TODO: some of these are probably not needed but I don't understand numpy magic well enough + bx = bx.flatten() + by = (by.flatten()) * 1 + bw = bw.flatten() + bh = bh.flatten() * 1 + half_bw = bw / 2. + half_bh = bh / 2. + + tl_x = np.multiply(bx - half_bw, 1) + tl_y = np.multiply(by - half_bh, 1) + br_x = np.multiply(bx + half_bw, 1) + br_y = np.multiply(by + half_bh, 1) + + # Get indices of boxes with score higher than threshold + indices = np.argwhere(scores >= 0.5) + selected_boxes = [] + selected_scores = [] + for i in indices: + i = tuple(i) + selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) ) + selected_scores.append(scores[i]) + + selected_boxes = np.array(selected_boxes) + selected_scores = np.array(selected_scores) + selected_classes = indices[:, 1] + + return selected_boxes, selected_scores, selected_classes + + def detect_yolov5(self, tensor_input): + tensor_input = np.squeeze(tensor_input, axis=0) + results = self.yolov5Model.forward(tensor_input) + print(self.yolov5Model.get_last_inference_time()) + det = results[0] + + detections = np.zeros((20, 6), np.float32) + i = 0 + for *xyxy, conf, cls in reversed(det): + detections[i] = [ + int(cls)+1, + float(conf), + xyxy[1], + xyxy[0], + xyxy[3], + xyxy[2], + ] + i += 1 + + return detections + + def detect_ssd(self, tensor_input): self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) self.interpreter.invoke() @@ -106,6 +240,69 @@ class LocalObjectDetector(ObjectDetector): return detections + def detect_yolov5_pytorch(self, tensor_input): + tensor_input = np.squeeze(tensor_input, axis=0) + results = self.yolov5ObjectDetector.score_frame(tensor_input) + labels, cord = results + n = len(labels) + detections = np.zeros((20, 6), np.float32) + if n > 0: + print(f"Total Targets: {n}") + print(f"Labels: {set([self.yolov5ObjectDetector.class_to_label(label) for label in labels])}") + for i in range(n): + if i < 20: + row = cord[i] + score = float(row[4]) + if score < 0.4: + break + x1, y1, x2, y2 = row[0], row[1], row[2], row[3] + label = self.yolov5ObjectDetector.class_to_label(labels[i]) + #detections[i] = [labels[i]+1, score, x1, y1, x2, y2] + detections[i] = [labels[i]+1, score, y1, x1, y2, x2] + print(detections[i]) + + return detections + + + def detect_yolov3(self, tensor_input): + input_details, output_details, net_input_shape = \ + self.get_interpreter_details() + + self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input) + self.interpreter.invoke() + + # for yolo, it's a little diffrent + out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index']) + out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index']) + + # Dequantize output (tpu only) + o1_scale, o1_zero = self.tensor_output_details[0]['quantization'] + out1 = (out1.astype(np.float32) - o1_zero) * o1_scale + o2_scale, o2_zero = self.tensor_output_details[1]['quantization'] + out2 = (out2.astype(np.float32) - o2_zero) * o2_scale + + num_classes = len(self.labels) + _boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape) + _boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]], len(self.labels), net_input_shape) + + if _boxes1.shape[0] == 0: + _boxes1 = np.empty([0, 2, 2]) + _scores1 = np.empty([0,]) + _classes1 = np.empty([0,]) + if _boxes2.shape[0] == 0: + _boxes2 = np.empty([0, 2, 2]) + _scores2 = np.empty([0,]) + _classes2 = np.empty([0,]) + boxes = np.append(_boxes1, _boxes2, axis=0) + scores = np.append(_scores1, _scores2, axis=0) + label_codes = np.append(_classes1, _classes2, axis=0) + + detections = np.zeros((20,6), np.float32) + for i, score in enumerate(scores): + if i < 20: + detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]] + + return detections def run_detector( name: str, @@ -113,8 +310,7 @@ def run_detector( out_events: Dict[str, mp.Event], avg_speed, start, - model_path, - model_shape, + model_config, tf_device, num_threads, ): @@ -134,7 +330,7 @@ def run_detector( frame_manager = SharedMemoryFrameManager() object_detector = LocalObjectDetector( - tf_device=tf_device, model_path=model_path, num_threads=num_threads + model_config, tf_device=tf_device, num_threads=num_threads ) outputs = {} @@ -149,7 +345,7 @@ def run_detector( except queue.Empty: continue input_frame = frame_manager.get( - connection_id, (1, model_shape[0], model_shape[1], 3) + connection_id, (1, model_config.height, model_config.width, 3) ) if input_frame is None: @@ -172,8 +368,7 @@ class EdgeTPUProcess: name, detection_queue, out_events, - model_path, - model_shape, + model_config, tf_device=None, num_threads=3, ): @@ -183,10 +378,11 @@ class EdgeTPUProcess: self.avg_inference_speed = mp.Value("d", 0.01) self.detection_start = mp.Value("d", 0.0) self.detect_process = None - self.model_path = model_path - self.model_shape = model_shape + self.model_path = model_config.path + self.model_shape = (model_config.height, model_config.width) self.tf_device = tf_device self.num_threads = num_threads + self.model_config = model_config self.start_or_restart() def stop(self): @@ -211,8 +407,7 @@ class EdgeTPUProcess: self.out_events, self.avg_inference_speed, self.detection_start, - self.model_path, - self.model_shape, + self.model_config, self.tf_device, self.num_threads, ), diff --git a/frigate/yolov5/edgetpumodel.py b/frigate/yolov5/edgetpumodel.py new file mode 100644 index 000000000..c6c4966d2 --- /dev/null +++ b/frigate/yolov5/edgetpumodel.py @@ -0,0 +1,318 @@ +import time +import os +import sys +import logging + +import yaml +import numpy as np +import pycoral.utils.edgetpu as etpu +from pycoral.adapters import common +from frigate.yolov5.nms import non_max_suppression +import cv2 +import json +import tflite_runtime.interpreter as tflite +from frigate.yolov5.utils import plot_one_box, Colors, get_image_tensor + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("EdgeTPUModel") + + +class EdgeTPUModel: + + def __init__(self, model_file, names_file, conf_thresh=0.25, iou_thresh=0.45, desktop=True, filter_classes=None, + agnostic_nms=False, max_det=1000): + """ + Creates an object for running a Yolov5 model on an EdgeTPU or a Desktop + + Inputs: + - model_file: path to edgetpu-compiled tflite file + - names_file: yaml names file (yolov5 format) + - conf_thresh: detection threshold + - iou_thresh: NMS threshold + - desktop: option to run model on a desktop + - filter_classes: only output certain classes + - agnostic_nms: use class-agnostic NMS + - max_det: max number of detections + """ + + model_file = os.path.abspath(model_file) + + if not model_file.endswith('tflite'): + model_file += ".tflite" + + self.model_file = model_file + self.conf_thresh = conf_thresh + self.iou_thresh = iou_thresh + self.desktop = desktop + self.filter_classes = filter_classes + self.agnostic_nms = agnostic_nms + self.max_det = 1000 + + logger.info("Confidence threshold: {}".format(conf_thresh)) + logger.info("IOU threshold: {}".format(iou_thresh)) + + self.inference_time = None + self.nms_time = None + self.interpreter = None + self.colors = Colors() # create instance for 'from utils.plots import colors' + + #self.get_names(names_file) + self.names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', + 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', + 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', + 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', + 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] + self.make_interpreter() + self.get_image_size() + + def get_names(self, path): + """ + Load a names file + + Inputs: + - path: path to names file in yaml format + """ + + with open(path, 'r') as f: + cfg = yaml.load(f, Loader=yaml.SafeLoader) + + names = cfg['names'] + logger.info("Loaded {} classes".format(len(names))) + + self.names = names + + def make_interpreter(self): + """ + Internal function that loads the tflite file and creates + the interpreter that deals with the EdgeTPU hardware. + """ + # Load the model and allocate + # Choose desktop or EdgTPU + if self.desktop: + self.interpreter = tflite.Interpreter(self.model_file) + else: + self.interpreter = etpu.make_interpreter(self.model_file) + self.interpreter.allocate_tensors() + + self.input_details = self.interpreter.get_input_details() + self.output_details = self.interpreter.get_output_details() + + logger.debug(self.input_details) + logger.debug(self.output_details) + + self.input_zero = self.input_details[0]['quantization'][1] + self.input_scale = self.input_details[0]['quantization'][0] + self.output_zero = self.output_details[0]['quantization'][1] + self.output_scale = self.output_details[0]['quantization'][0] + + # If the model isn't quantized then these should be zero + # Check against small epsilon to avoid comparing float/int + if self.input_scale < 1e-9: + self.input_scale = 1.0 + + if self.output_scale < 1e-9: + self.output_scale = 1.0 + + logger.debug("Input scale: {}".format(self.input_scale)) + logger.debug("Input zero: {}".format(self.input_zero)) + logger.debug("Output scale: {}".format(self.output_scale)) + logger.debug("Output zero: {}".format(self.output_zero)) + + logger.info("Successfully loaded {}".format(self.model_file)) + + def get_image_size(self): + """ + Returns the expected size of the input image tensor + """ + if self.interpreter is not None: + self.input_size = common.input_size(self.interpreter) + logger.debug("Expecting input shape: {}".format(self.input_size)) + return self.input_size + else: + logger.warn("Interpreter is not yet loaded") + + def predict(self, image_path, save_img=True, save_txt=True): + logger.info("Attempting to load {}".format(image_path)) + + full_image, net_image, pad = get_image_tensor(image_path, self.input_size[0]) + pred = self.forward(net_image) + logger.info("Inference time: {}".format(self.inference_time)) + + base, ext = os.path.splitext(image_path) + + output_path = base + "_detect" + ext + det = self.process_predictions(pred[0], full_image, pad, output_path, save_img=save_img, save_txt=save_txt) + + return det + + def forward(self, x: np.ndarray, with_nms=True) -> np.ndarray: + """ + Predict function using the EdgeTPU + + Inputs: + x: (C, H, W) image tensor + with_nms: apply NMS on output + + Returns: + prediction array (with or without NMS applied) + + """ + tstart = time.time() + # Transpose if C, H, W + if x.shape[0] == 3: + x = x.transpose((1, 2, 0)) + + x = x.astype('float32') + + # Scale input, conversion is: real = (int_8 - zero)*scale + x = (x / self.input_scale) + self.input_zero + x = x[np.newaxis].astype(np.uint8) + + self.interpreter.set_tensor(self.input_details[0]['index'], x) + self.interpreter.invoke() + + # Scale output + result = (common.output_tensor(self.interpreter, 0).astype('float32') - self.output_zero) * self.output_scale + self.inference_time = time.time() - tstart + + if with_nms: + + tstart = time.time() + nms_result = non_max_suppression(result, self.conf_thresh, self.iou_thresh, self.filter_classes, + self.agnostic_nms, max_det=self.max_det) + self.nms_time = time.time() - tstart + + return nms_result + + else: + return result + + def get_last_inference_time(self, with_nms=True): + """ + Returns a tuple containing most recent inference and NMS time + """ + res = [self.inference_time] + + if with_nms: + res.append(self.nms_time) + + return res + + def get_scaled_coords(self, xyxy, output_image, pad): + """ + Converts raw prediction bounding box to orginal + image coordinates. + + Args: + xyxy: array of boxes + output_image: np array + pad: padding due to image resizing (pad_w, pad_h) + """ + pad_w, pad_h = pad + in_h, in_w = self.input_size + out_h, out_w, _ = output_image.shape + + ratio_w = out_w / (in_w - pad_w) + ratio_h = out_h / (in_h - pad_h) + + out = [] + for coord in xyxy: + x1, y1, x2, y2 = coord + + x1 *= in_w * ratio_w + x2 *= in_w * ratio_w + y1 *= in_h * ratio_h + y2 *= in_h * ratio_h + + x1 = max(0, x1) + x2 = min(out_w, x2) + + y1 = max(0, y1) + y2 = min(out_h, y2) + + out.append((x1, y1, x2, y2)) + + return np.array(out).astype(int) + + def process_predictions2(self, det): + """ + Process predictions and optionally output an image with annotations + """ + if len(det): + # Rescale boxes from img_size to im0 size + # x1, y1, x2, y2= + #det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad) + output = {} + #base, ext = os.path.splitext(output_path) + + s = "" + + # Print results + for c in np.unique(det[:, -1]): + n = (det[:, -1] == c).sum() # detections per class + s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + + if s != "": + s = s.strip() + s = s[:-1] + + logger.info("Detected: {}".format(s)) + + for *xyxy, conf, cls in reversed(det): + output = {} + output['box'] = xyxy + output['conf'] = conf + output['cls'] = cls + output['cls_name'] = self.names[c] + return output + + def process_predictions(self, det, output_image=None, pad=(0, 0), output_path="detection.jpg", save_img=False, save_txt=False, + hide_labels=False, hide_conf=False): + """ + Process predictions and optionally output an image with annotations + """ + if len(det): + # Rescale boxes from img_size to im0 size + # x1, y1, x2, y2= + det[:, :4] = self.get_scaled_coords(det[:, :4], output_image, pad) + output = {} + base, ext = os.path.splitext(output_path) + + s = "" + + # Print results + for c in np.unique(det[:, -1]): + n = (det[:, -1] == c).sum() # detections per class + s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + + if s != "": + s = s.strip() + s = s[:-1] + + logger.info("Detected: {}".format(s)) + + # Write results + for *xyxy, conf, cls in reversed(det): + if save_img: # Add bbox to image + c = int(cls) # integer class + label = None if hide_labels else (self.names[c] if hide_conf else f'{self.names[c]} {conf:.2f}') + output_image = plot_one_box(xyxy, output_image, label=label, color=self.colors(c, True)) + if save_txt: + output[base] = {} + output[base]['box'] = xyxy + output[base]['conf'] = conf + output[base]['cls'] = cls + output[base]['cls_name'] = self.names[c] + + if save_txt: + output_txt = base + "txt" + with open(output_txt, 'w') as f: + json.dump(output, f, indent=1) + if save_img: + cv2.imwrite(output_path, output_image) + + return det \ No newline at end of file diff --git a/frigate/yolov5/nms.py b/frigate/yolov5/nms.py new file mode 100644 index 000000000..4cb6a5428 --- /dev/null +++ b/frigate/yolov5/nms.py @@ -0,0 +1,142 @@ +import numpy as np +import time + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x + y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y + y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x + y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y + return y + + +def nms(dets, scores, thresh): + ''' + dets is a numpy array : num_dets, 4 + scores ia nump array : num_dets, + ''' + + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + + areas = (x2 - x1 + 1e-9) * (y2 - y1 + 1e-9) + order = scores.argsort()[::-1] # get boxes with more ious first + + keep = [] + while order.size > 0: + i = order[0] # pick maxmum iou box + other_box_ids = order[1:] + keep.append(i) + + xx1 = np.maximum(x1[i], x1[other_box_ids]) + yy1 = np.maximum(y1[i], y1[other_box_ids]) + xx2 = np.minimum(x2[i], x2[other_box_ids]) + yy2 = np.minimum(y2[i], y2[other_box_ids]) + + # print(list(zip(xx1, yy1, xx2, yy2))) + + w = np.maximum(0.0, xx2 - xx1 + 1e-9) # maximum width + h = np.maximum(0.0, yy2 - yy1 + 1e-9) # maxiumum height + inter = w * h + + ovr = inter / (areas[i] + areas[other_box_ids] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return np.array(keep) + + +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, + labels=(), max_det=300): + nc = prediction.shape[2] - 5 # number of classes + xc = prediction[..., 4] > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 10.0 # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [np.zeros((0, 6))] * prediction.shape[0] + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + l = labels[xi] + v = np.zeros((len(l), nc + 5)) + v[:, :4] = l[:, 1:5] # box + v[:, 4] = 1.0 # conf + v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + x = np.concatenate((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Compute conf + x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf + + # Box (center x, center y, width, height) to (x1, y1, x2, y2) + box = xywh2xyxy(x[:, :4]) + + # Detections matrix nx6 (xyxy, conf, cls) + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = np.concatenate((box[i], x[i, j + 5, None], j[:, None].astype(float)), axis=1) + else: # best class only + conf = np.amax(x[:, 5:], axis=1, keepdims=True) + j = np.argmax(x[:, 5:], axis=1).reshape(conf.shape) + x = np.concatenate((box, conf, j.astype(float)), axis=1)[conf.flatten() > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == np.array(classes)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + + i = nms(boxes, scores, iou_thres) # NMS + + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = np.dot(weights, x[:, :4]).astype(float) / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f'WARNING: NMS time limit {time_limit}s exceeded') + break # time limit exceeded + + return output \ No newline at end of file diff --git a/frigate/yolov5/utils.py b/frigate/yolov5/utils.py new file mode 100644 index 000000000..903eaf9f4 --- /dev/null +++ b/frigate/yolov5/utils.py @@ -0,0 +1,120 @@ +import os +import sys +import argparse +import logging +import time +from pathlib import Path + +import numpy as np +import cv2 + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + # hex = matplotlib.colors.TABLEAU_COLORS.values() + hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', + '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') + self.palette = [self.hex2rgb('#' + c) for c in hex] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + +def plot_one_box(box, im, color=(128, 128, 128), txt_color=(255, 255, 255), label=None, line_width=3): + # Plots one xyxy box on image im with label + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.' + lw = line_width or max(int(min(im.size) / 200), 2) # line width + + c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + + cv2.rectangle(im, c1, c2, color, thickness=lw, lineType=cv2.LINE_AA) + if label: + tf = max(lw - 1, 1) # font thickness + txt_width, txt_height = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] + c2 = c1[0] + txt_width, c1[1] - txt_height - 3 + cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA) # filled + cv2.putText(im, label, (c1[0], c1[1] - 2), 0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) + return im + + +def resize_and_pad(image, desired_size): + old_size = image.shape[:2] + ratio = float(desired_size / max(old_size)) + new_size = tuple([int(x * ratio) for x in old_size]) + + # new_size should be in (width, height) format + + image = cv2.resize(image, (new_size[1], new_size[0])) + + delta_w = desired_size - new_size[1] + delta_h = desired_size - new_size[0] + + pad = (delta_w, delta_h) + + color = [100, 100, 100] + new_im = cv2.copyMakeBorder(image, 0, delta_h, 0, delta_w, cv2.BORDER_CONSTANT, + value=color) + + return new_im, pad + + +def get_image_tensor(img, max_size, debug=False): + """ + Reshapes an input image into a square with sides max_size + """ + if type(img) is str: + img = cv2.imread(img) + + resized, pad = resize_and_pad(img, max_size) + resized = resized.astype(np.float32) + + if debug: + cv2.imwrite("intermediate.png", resized) + + # Normalise! + resized /= 255.0 + + return img, resized, pad + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = np.copy(x) + y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center + y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center + y[:, 2] = x[:, 2] - x[:, 0] # width + y[:, 3] = x[:, 3] - x[:, 1] # height + return y + + +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + return x + + +def save_one_json(predn, jdict, path, class_map): + # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} + image_id = int(path.stem) if path.stem.isnumeric() else path.stem + + box = xyxy2xywh(predn[:, :4]) # xywh + box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner + + for p, b in zip(predn.tolist(), box.tolist()): + jdict.append({'image_id': image_id, + 'category_id': class_map[int(p[5])], + 'bbox': [round(x, 3) for x in b], + 'score': round(p[4], 5)}) diff --git a/frigate/yolov5_pytorch.py b/frigate/yolov5_pytorch.py new file mode 100644 index 000000000..d0e9c7fac --- /dev/null +++ b/frigate/yolov5_pytorch.py @@ -0,0 +1,111 @@ +import torch +import numpy as np +#import cv2 +from time import time +import sys + + +class ObjectDetection: + """ + The class performs generic object detection on a video file. + It uses yolo5 pretrained model to make inferences and opencv2 to manage frames. + Included Features: + 1. Reading and writing of video file using Opencv2 + 2. Using pretrained model to make inferences on frames. + 3. Use the inferences to plot boxes on objects along with labels. + Upcoming Features: + """ + def __init__(self): + self.model = self.load_model() + self.model.conf = 0.4 # set inference threshold at 0.3 + self.model.iou = 0.3 # set inference IOU threshold at 0.3 + #self.model.classes = [0] # set model to only detect "Person" class + #self.model.classes = self.model.names + self.classes = self.model.names + self.found_lables = set() # set + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + + def load_model(self): + """ + Function loads the yolo5 model from PyTorch Hub. + """ + #model = torch.hub.load('/media/frigate/yolov5', 'custom', path='/media/frigate/yolov5/yolov5l.pt', source='local') + model = torch.hub.load('/media/frigate/yolov5', 'custom', path='/media/frigate/yolov5/yolov5s.pt', source='local') + #model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) + #model = torch.hub.load('ultralytics/yolov3', 'yolov3', pretrained=True) + return model + + def class_to_label(self, x): + """ + For a given label value, return corresponding string label. + :param x: numeric label + :return: corresponding string label + """ + return self.classes[int(x)] + + def score_frame(self, frame): + """ + function scores each frame of the video and returns results. + :param frame: frame to be infered. + :return: labels and coordinates of objects found. + """ + self.model.to(self.device) + results = self.model(frame) + labels, cord = results.xyxyn[0][:, -1].to('cpu').numpy(), results.xyxyn[0][:, :-1].to('cpu').numpy() + return labels, cord + + def plot_boxes(self, results, frame): + """ + plots boxes and labels on frame. + :param results: inferences made by model + :param frame: frame on which to make the plots + :return: new frame with boxes and labels plotted. + """ + labels, cord = results + n = len(labels) + if n > 0: + print(f"Total Targets: {n}") + print(f"Labels: {set([self.class_to_label(label) for label in labels])}") + x_shape, y_shape = frame.shape[1], frame.shape[0] + for i in range(n): + self.found_lables.add(self.class_to_label(labels[i])) + row = cord[i] + x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape) + bgr = (0, 0, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 1) + label = f"{int(row[4]*100)}" + cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) + cv2.putText(frame, f"Total Targets: {n}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + + return frame + + def __call__(self): + player = self.get_video_from_file() # create streaming service for application + assert player.isOpened() + x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH)) + y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT)) + four_cc = cv2.VideoWriter_fourcc(*"MJPG") + out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape)) + fc = 0 + fps = 0 + tfc = int(player.get(cv2.CAP_PROP_FRAME_COUNT)) + tfcc = 0 + while True: + fc += 1 + start_time = time() + ret, frame = player.read() + if not ret: + break + results = self.score_frame(frame) + frame = self.plot_boxes(results, frame) + end_time = time() + fps += 1/np.round(end_time - start_time, 3) + if fc == 10: + fps = int(fps / 10) + tfcc += fc + fc = 0 + per_com = int(tfcc / tfc * 100) + print(f"Frames Per Second : {fps} || Percentage Parsed : {per_com}") + out.write(frame) + print(f"Found labels: {self.found_lables}") + player.release()