Initial support for YOLOv3-tiny

2026-02-01 08:45:21 +03:00 · 2021-04-24 23:48:42 +00:00 · 2021-04-24 23:48:42 +00:00 · 012e2ca00d
commit 012e2ca00d
parent 42410a260c
5 changed files with 194 additions and 20 deletions
--- a/docs/docs/configuration/advanced.md
+++ b/docs/docs/configuration/advanced.md
@ -112,7 +112,13 @@ detectors:
 ```yaml
 model:
  # Required: height of the trained model
-  height: 320
+  height: 416
  # Required: width of the trained model
-  width: 320
+  width: 416
+  # Required: type of model (ssd or yolo)
+  model_type: 'yolo'
+  # Required: path of label map
+  label_path: '/labelmap.txt'
+  # Optional: (but required for yolo) - anchors, comma separated
+  anchors:  '10,14,  23,27,  37,58,  81,82,  135,169,  344,319'
 ```
--- a/docs/docs/configuration/objects.mdx
+++ b/docs/docs/configuration/objects.mdx
@ -24,6 +24,32 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo

 You also need to update the model width/height in the config if they differ from the defaults.

+You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU.
+
+A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models)
+
+Add it as a volume mount in your docker-compose file:
+```yaml
+    volumes:
+      - /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite
+```
+
+And then set the configuration for the model in config.yml:
+
+```yaml
+model:
+  # Required: height of the trained model
+  height: 416
+  # Required: width of the trained model
+  width: 416
+  # Required: type of model (ssd or yolo)
+  model_type: 'yolo'
+  # Required: path of label map
+  label_path: '/labelmap.txt'
+  # Optional: (but required for yolo) - anchors, comma separated
+  anchors:  '10,14,  23,27,  37,58,  81,82,  135,169,  344,319'
+```
+
 ### Customizing the Labelmap

 The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change:
--- a/frigate/app.py
+++ b/frigate/app.py
@ -31,6 +31,7 @@ from frigate.zeroconf import broadcast_zeroconf

 logger = logging.getLogger(__name__)

+
 class FrigateApp():
    def __init__(self):
        self.stop_event = mp.Event()
@ -153,9 +154,9 @@ class FrigateApp():

        for name, detector in self.config.detectors.items():
            if detector.type == 'cpu':
-                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, 'cpu', detector.num_threads)
+                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, 'cpu', detector.num_threads)
            if detector.type == 'edgetpu':
-                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, detector.device, detector.num_threads)
+                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, detector.device, detector.num_threads)

    def start_detected_frames_processor(self):
        self.detected_frames_processor = TrackedObjectProcessor(self.config, self.mqtt_client, self.config.mqtt.topic_prefix, 
--- a/frigate/config.py
+++ b/frigate/config.py
@ -244,9 +244,12 @@ FRIGATE_CONFIG_SCHEMA = vol.Schema(
        vol.Optional('database', default={}): {
            vol.Optional('path', default=os.path.join(CLIPS_DIR, 'frigate.db')): str
        },
-        vol.Optional('model', default={'width': 320, 'height': 320}): {
+        vol.Optional('model', default={'width': 320, 'height': 320, 'model_type': 'ssd', 'label_path': '/labelmap.txt'}): {
            vol.Required('width'): int,
-            vol.Required('height'): int
+            vol.Required('height'): int,
+            vol.Required('model_type') : vol.In(['ssd', 'yolo']),
+            vol.Required('label_path') : str,
+            vol.Optional('anchors', default="") : str
        },
        vol.Optional('detectors', default=DEFAULT_DETECTORS): DETECTORS_SCHEMA,
        'mqtt': MQTT_SCHEMA,
@ -288,6 +291,9 @@ class ModelConfig():
    def __init__(self, config):
        self._width = config['width']
        self._height = config['height']
+        self._label_path = config['label_path']
+        self._model_type = config['model_type']
+        self._anchors = config['anchors']

    @property
    def width(self):
@ -297,10 +303,25 @@ class ModelConfig():
    def height(self):
        return self._height

+    @property
+    def label_path(self):
+        return self._label_path
+
+    @property
+    def model_type(self):
+        return self._model_type
+
+    @property
+    def anchors(self):
+        return self._anchors
+
    def to_dict(self):
        return {
            'width': self.width,
-            'height': self.height
+            'height': self.height,
+            'label_path': self.label_path,
+            'model_type': self.model_type,
+            'anchors': self.anchors
        }

 class DetectorConfig():
--- a/frigate/edgetpu.py
+++ b/frigate/edgetpu.py
@ -27,8 +27,10 @@ def load_labels(path, encoding='utf-8'):
  Returns:
    Dictionary mapping indices to labels.
  """
+  logger.warn(f"Loaded labels from {path}")
  with open(path, 'r', encoding=encoding) as f:
    lines = f.readlines()
+
    if not lines:
        return {}

@ -44,13 +46,11 @@ class ObjectDetector(ABC):
        pass

 class LocalObjectDetector(ObjectDetector):
-    def __init__(self, tf_device=None, num_threads=3, labels=None):
+    def __init__(self, model_config, tf_device=None, num_threads=3):
        self.fps = EventsPerSecond()
-        if labels is None:
-            self.labels = {}
-        else:
-            self.labels = load_labels(labels)
-
+        self.labels = load_labels(model_config.label_path)
+        self.model_config = model_config
+            
        device_config = {"device": "usb"}
        if not tf_device is None:
            device_config = {"device": tf_device}
@ -76,7 +76,11 @@ class LocalObjectDetector(ObjectDetector):

        self.tensor_input_details = self.interpreter.get_input_details()
        self.tensor_output_details = self.interpreter.get_output_details()
-    
+
+        if model_config.anchors != "":
+            anchors = [float(x) for x in model_config.anchors.split(',')]
+            self.anchors = np.array(anchors).reshape(-1, 2)
+
    def detect(self, tensor_input, threshold=.4):
        detections = []

@ -93,20 +97,136 @@ class LocalObjectDetector(ObjectDetector):
        self.fps.update()
        return detections

+    def sigmoid(self, x):
+        return 1. / (1 + np.exp(-x))
+
    def detect_raw(self, tensor_input):
+        if self.model_config.model_type == "ssd":
+            raw_detections = self.detect_ssd(tensor_input)
+        elif self.model_config.model_type == "yolo":
+            raw_detections = self.detect_yolo(tensor_input)
+        else:
+            logger.error(f"Unsupported model type {self.model_config.model_type}")
+            raw_detections = []
+        return raw_detections
+
+
+    def get_interpreter_details(self):
+        # Get input and output tensor details
+        input_details = self.interpreter.get_input_details()
+        output_details = self.interpreter.get_output_details()
+        input_shape = input_details[0]["shape"]
+        return input_details, output_details, input_shape
+
+    # from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo
+    def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape):
+        grid_shape = outputs.shape[1:3]
+        n_anchors = len(anchors)
+
+        # Numpy screwaround to get the boxes in reasonable amount of time
+        grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32)
+        grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32)
+        outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1)
+        _anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32)
+
+        # Get box parameters from network output and apply transformations
+        bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0] 
+        by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1]
+        # Should these be inverted?
+        bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2]))
+        bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3]))
+        
+        # Get the scores 
+        scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \
+                self.sigmoid(outputs[..., 5:])
+        scores = scores.reshape(-1, n_classes)
+
+        # TODO: some of these are probably not needed but I don't understand numpy magic well enough
+        bx = bx.flatten()
+        by = (by.flatten()) * 1
+        bw = bw.flatten()
+        bh = bh.flatten() * 1
+        half_bw = bw / 2.
+        half_bh = bh / 2.
+
+        tl_x = np.multiply(bx - half_bw, 1)
+        tl_y = np.multiply(by - half_bh, 1) 
+        br_x = np.multiply(bx + half_bw, 1)
+        br_y = np.multiply(by + half_bh, 1)
+
+        # Get indices of boxes with score higher than threshold
+        indices = np.argwhere(scores >= 0.5)
+        selected_boxes = []
+        selected_scores = []
+        for i in indices:
+            i = tuple(i)
+            selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) )
+            selected_scores.append(scores[i])
+
+        selected_boxes = np.array(selected_boxes)
+        selected_scores = np.array(selected_scores)
+        selected_classes = indices[:, 1]
+
+        return selected_boxes, selected_scores, selected_classes
+        
+    def detect_ssd(self, tensor_input):
        self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
        self.interpreter.invoke()
        boxes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[0]['index']))
        label_codes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[1]['index']))
        scores = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[2]['index']))

+        # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
+        # more than 20 results.
        detections = np.zeros((20,6), np.float32)
        for i, score in enumerate(scores):
            detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
        
        return detections

-def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_shape, tf_device, num_threads):
+    def detect_yolo(self, tensor_input):
+        input_details, output_details, net_input_shape = \
+            self.get_interpreter_details()
+
+        self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
+        self.interpreter.invoke()
+
+        # for yolo, it's a little diffrent
+        out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index'])
+        out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index'])
+
+        # Dequantize output (tpu only)
+        o1_scale, o1_zero = self.tensor_output_details[0]['quantization']
+        out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
+        o2_scale, o2_zero = self.tensor_output_details[1]['quantization']
+        out2 = (out2.astype(np.float32) - o2_zero) * o2_scale
+
+        num_classes = len(self.labels)
+        _boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape)
+        _boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]],  len(self.labels), net_input_shape)
+
+        if _boxes1.shape[0] == 0:
+            _boxes1 = np.empty([0, 2, 2])
+            _scores1 = np.empty([0,])
+            _classes1 = np.empty([0,])
+        if _boxes2.shape[0] == 0:
+            _boxes2 = np.empty([0, 2, 2])
+            _scores2 = np.empty([0,])
+            _classes2 = np.empty([0,])
+        boxes = np.append(_boxes1, _boxes2, axis=0)
+        scores = np.append(_scores1, _scores2, axis=0)
+        label_codes = np.append(_classes1, _classes2, axis=0)
+
+        # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
+        # more than 20 results.
+        detections = np.zeros((20,6), np.float32)
+        for i, score in enumerate(scores):
+            if i < 20:
+                detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]]
+        
+        return detections
+
+def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_config, tf_device, num_threads):
    threading.current_thread().name = f"detector:{name}"
    logger = logging.getLogger(f"detector.{name}")
    logger.info(f"Starting detection process: {os.getpid()}")
@ -121,7 +241,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
    signal.signal(signal.SIGINT, receiveSignal)

    frame_manager = SharedMemoryFrameManager()
-    object_detector = LocalObjectDetector(tf_device=tf_device, num_threads=num_threads)
+    object_detector = LocalObjectDetector(model_config, tf_device=tf_device, num_threads=num_threads)

    outputs = {}
    for name in out_events.keys():
@ -140,7 +260,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
            connection_id = detection_queue.get(timeout=5)
        except queue.Empty:
            continue
-        input_frame = frame_manager.get(connection_id, (1,model_shape[0],model_shape[1],3))
+        input_frame = frame_manager.get(connection_id, (1, model_config.height, model_config.width,3))

        if input_frame is None:
            continue
@ -156,16 +276,16 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
        avg_speed.value = (avg_speed.value*9 + duration)/10
        
 class EdgeTPUProcess():
-    def __init__(self, name, detection_queue, out_events, model_shape, tf_device=None, num_threads=3):
+    def __init__(self, name, detection_queue, out_events, model_config, tf_device=None, num_threads=3):
        self.name = name
        self.out_events = out_events
        self.detection_queue = detection_queue
        self.avg_inference_speed = mp.Value('d', 0.01)
        self.detection_start = mp.Value('d', 0.0)
        self.detect_process = None
-        self.model_shape = model_shape
        self.tf_device = tf_device
        self.num_threads = num_threads
+        self.model_config = model_config
        self.start_or_restart()
    
    def stop(self):
@ -181,7 +301,7 @@ class EdgeTPUProcess():
        self.detection_start.value = 0.0
        if (not self.detect_process is None) and self.detect_process.is_alive():
            self.stop()
-        self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_shape, self.tf_device, self.num_threads))
+        self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_config, self.tf_device, self.num_threads))
        self.detect_process.daemon = True
        self.detect_process.start()