diff --git a/docs/docs/configuration/advanced.md b/docs/docs/configuration/advanced.md index b03b438a4..7889b7933 100644 --- a/docs/docs/configuration/advanced.md +++ b/docs/docs/configuration/advanced.md @@ -112,7 +112,13 @@ detectors: ```yaml model: # Required: height of the trained model - height: 320 + height: 416 # Required: width of the trained model - width: 320 + width: 416 + # Required: type of model (ssd or yolo) + model_type: 'yolo' + # Required: path of label map + label_path: '/labelmap.txt' + # Optional: (but required for yolo) - anchors, comma separated + anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319' ``` diff --git a/docs/docs/configuration/objects.mdx b/docs/docs/configuration/objects.mdx index 3e95f9e83..b385f3460 100644 --- a/docs/docs/configuration/objects.mdx +++ b/docs/docs/configuration/objects.mdx @@ -24,6 +24,32 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo You also need to update the model width/height in the config if they differ from the defaults. +You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU. + +A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models) + +Add it as a volume mount in your docker-compose file: +```yaml + volumes: + - /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite +``` + +And then set the configuration for the model in config.yml: + +```yaml +model: + # Required: height of the trained model + height: 416 + # Required: width of the trained model + width: 416 + # Required: type of model (ssd or yolo) + model_type: 'yolo' + # Required: path of label map + label_path: '/labelmap.txt' + # Optional: (but required for yolo) - anchors, comma separated + anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319' +``` + ### Customizing the Labelmap The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change: diff --git a/frigate/app.py b/frigate/app.py index 8e5178c79..4e24a81f3 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -31,6 +31,7 @@ from frigate.zeroconf import broadcast_zeroconf logger = logging.getLogger(__name__) + class FrigateApp(): def __init__(self): self.stop_event = mp.Event() @@ -153,9 +154,9 @@ class FrigateApp(): for name, detector in self.config.detectors.items(): if detector.type == 'cpu': - self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, 'cpu', detector.num_threads) + self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, 'cpu', detector.num_threads) if detector.type == 'edgetpu': - self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, detector.device, detector.num_threads) + self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, detector.device, detector.num_threads) def start_detected_frames_processor(self): self.detected_frames_processor = TrackedObjectProcessor(self.config, self.mqtt_client, self.config.mqtt.topic_prefix, diff --git a/frigate/config.py b/frigate/config.py index 0878f3c1f..87aa7d5d4 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -244,9 +244,12 @@ FRIGATE_CONFIG_SCHEMA = vol.Schema( vol.Optional('database', default={}): { vol.Optional('path', default=os.path.join(CLIPS_DIR, 'frigate.db')): str }, - vol.Optional('model', default={'width': 320, 'height': 320}): { + vol.Optional('model', default={'width': 320, 'height': 320, 'model_type': 'ssd', 'label_path': '/labelmap.txt'}): { vol.Required('width'): int, - vol.Required('height'): int + vol.Required('height'): int, + vol.Required('model_type') : vol.In(['ssd', 'yolo']), + vol.Required('label_path') : str, + vol.Optional('anchors', default="") : str }, vol.Optional('detectors', default=DEFAULT_DETECTORS): DETECTORS_SCHEMA, 'mqtt': MQTT_SCHEMA, @@ -288,6 +291,9 @@ class ModelConfig(): def __init__(self, config): self._width = config['width'] self._height = config['height'] + self._label_path = config['label_path'] + self._model_type = config['model_type'] + self._anchors = config['anchors'] @property def width(self): @@ -297,10 +303,25 @@ class ModelConfig(): def height(self): return self._height + @property + def label_path(self): + return self._label_path + + @property + def model_type(self): + return self._model_type + + @property + def anchors(self): + return self._anchors + def to_dict(self): return { 'width': self.width, - 'height': self.height + 'height': self.height, + 'label_path': self.label_path, + 'model_type': self.model_type, + 'anchors': self.anchors } class DetectorConfig(): diff --git a/frigate/edgetpu.py b/frigate/edgetpu.py index d65ce523b..85d0b6a45 100644 --- a/frigate/edgetpu.py +++ b/frigate/edgetpu.py @@ -27,8 +27,10 @@ def load_labels(path, encoding='utf-8'): Returns: Dictionary mapping indices to labels. """ + logger.warn(f"Loaded labels from {path}") with open(path, 'r', encoding=encoding) as f: lines = f.readlines() + if not lines: return {} @@ -44,13 +46,11 @@ class ObjectDetector(ABC): pass class LocalObjectDetector(ObjectDetector): - def __init__(self, tf_device=None, num_threads=3, labels=None): + def __init__(self, model_config, tf_device=None, num_threads=3): self.fps = EventsPerSecond() - if labels is None: - self.labels = {} - else: - self.labels = load_labels(labels) - + self.labels = load_labels(model_config.label_path) + self.model_config = model_config + device_config = {"device": "usb"} if not tf_device is None: device_config = {"device": tf_device} @@ -76,7 +76,11 @@ class LocalObjectDetector(ObjectDetector): self.tensor_input_details = self.interpreter.get_input_details() self.tensor_output_details = self.interpreter.get_output_details() - + + if model_config.anchors != "": + anchors = [float(x) for x in model_config.anchors.split(',')] + self.anchors = np.array(anchors).reshape(-1, 2) + def detect(self, tensor_input, threshold=.4): detections = [] @@ -93,20 +97,136 @@ class LocalObjectDetector(ObjectDetector): self.fps.update() return detections + def sigmoid(self, x): + return 1. / (1 + np.exp(-x)) + def detect_raw(self, tensor_input): + if self.model_config.model_type == "ssd": + raw_detections = self.detect_ssd(tensor_input) + elif self.model_config.model_type == "yolo": + raw_detections = self.detect_yolo(tensor_input) + else: + logger.error(f"Unsupported model type {self.model_config.model_type}") + raw_detections = [] + return raw_detections + + + def get_interpreter_details(self): + # Get input and output tensor details + input_details = self.interpreter.get_input_details() + output_details = self.interpreter.get_output_details() + input_shape = input_details[0]["shape"] + return input_details, output_details, input_shape + + # from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo + def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape): + grid_shape = outputs.shape[1:3] + n_anchors = len(anchors) + + # Numpy screwaround to get the boxes in reasonable amount of time + grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32) + grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32) + outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1) + _anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32) + + # Get box parameters from network output and apply transformations + bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0] + by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1] + # Should these be inverted? + bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2])) + bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3])) + + # Get the scores + scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \ + self.sigmoid(outputs[..., 5:]) + scores = scores.reshape(-1, n_classes) + + # TODO: some of these are probably not needed but I don't understand numpy magic well enough + bx = bx.flatten() + by = (by.flatten()) * 1 + bw = bw.flatten() + bh = bh.flatten() * 1 + half_bw = bw / 2. + half_bh = bh / 2. + + tl_x = np.multiply(bx - half_bw, 1) + tl_y = np.multiply(by - half_bh, 1) + br_x = np.multiply(bx + half_bw, 1) + br_y = np.multiply(by + half_bh, 1) + + # Get indices of boxes with score higher than threshold + indices = np.argwhere(scores >= 0.5) + selected_boxes = [] + selected_scores = [] + for i in indices: + i = tuple(i) + selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) ) + selected_scores.append(scores[i]) + + selected_boxes = np.array(selected_boxes) + selected_scores = np.array(selected_scores) + selected_classes = indices[:, 1] + + return selected_boxes, selected_scores, selected_classes + + def detect_ssd(self, tensor_input): self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input) self.interpreter.invoke() boxes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[0]['index'])) label_codes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[1]['index'])) scores = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[2]['index'])) + # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's + # more than 20 results. detections = np.zeros((20,6), np.float32) for i, score in enumerate(scores): detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]] return detections -def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_shape, tf_device, num_threads): + def detect_yolo(self, tensor_input): + input_details, output_details, net_input_shape = \ + self.get_interpreter_details() + + self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input) + self.interpreter.invoke() + + # for yolo, it's a little diffrent + out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index']) + out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index']) + + # Dequantize output (tpu only) + o1_scale, o1_zero = self.tensor_output_details[0]['quantization'] + out1 = (out1.astype(np.float32) - o1_zero) * o1_scale + o2_scale, o2_zero = self.tensor_output_details[1]['quantization'] + out2 = (out2.astype(np.float32) - o2_zero) * o2_scale + + num_classes = len(self.labels) + _boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape) + _boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]], len(self.labels), net_input_shape) + + if _boxes1.shape[0] == 0: + _boxes1 = np.empty([0, 2, 2]) + _scores1 = np.empty([0,]) + _classes1 = np.empty([0,]) + if _boxes2.shape[0] == 0: + _boxes2 = np.empty([0, 2, 2]) + _scores2 = np.empty([0,]) + _classes2 = np.empty([0,]) + boxes = np.append(_boxes1, _boxes2, axis=0) + scores = np.append(_scores1, _scores2, axis=0) + label_codes = np.append(_classes1, _classes2, axis=0) + + # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's + # more than 20 results. + detections = np.zeros((20,6), np.float32) + for i, score in enumerate(scores): + if i < 20: + detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]] + + return detections + +def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_config, tf_device, num_threads): threading.current_thread().name = f"detector:{name}" logger = logging.getLogger(f"detector.{name}") logger.info(f"Starting detection process: {os.getpid()}") @@ -121,7 +241,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp. signal.signal(signal.SIGINT, receiveSignal) frame_manager = SharedMemoryFrameManager() - object_detector = LocalObjectDetector(tf_device=tf_device, num_threads=num_threads) + object_detector = LocalObjectDetector(model_config, tf_device=tf_device, num_threads=num_threads) outputs = {} for name in out_events.keys(): @@ -140,7 +260,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp. connection_id = detection_queue.get(timeout=5) except queue.Empty: continue - input_frame = frame_manager.get(connection_id, (1,model_shape[0],model_shape[1],3)) + input_frame = frame_manager.get(connection_id, (1, model_config.height, model_config.width,3)) if input_frame is None: continue @@ -156,16 +276,16 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp. avg_speed.value = (avg_speed.value*9 + duration)/10 class EdgeTPUProcess(): - def __init__(self, name, detection_queue, out_events, model_shape, tf_device=None, num_threads=3): + def __init__(self, name, detection_queue, out_events, model_config, tf_device=None, num_threads=3): self.name = name self.out_events = out_events self.detection_queue = detection_queue self.avg_inference_speed = mp.Value('d', 0.01) self.detection_start = mp.Value('d', 0.0) self.detect_process = None - self.model_shape = model_shape self.tf_device = tf_device self.num_threads = num_threads + self.model_config = model_config self.start_or_restart() def stop(self): @@ -181,7 +301,7 @@ class EdgeTPUProcess(): self.detection_start.value = 0.0 if (not self.detect_process is None) and self.detect_process.is_alive(): self.stop() - self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_shape, self.tf_device, self.num_threads)) + self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_config, self.tf_device, self.num_threads)) self.detect_process.daemon = True self.detect_process.start()