diff --git a/docs/docs/configuration/advanced.md b/docs/docs/configuration/advanced.md
index b03b438a4..7889b7933 100644
--- a/docs/docs/configuration/advanced.md
+++ b/docs/docs/configuration/advanced.md
@@ -112,7 +112,13 @@ detectors:
 ```yaml
 model:
   # Required: height of the trained model
-  height: 320
+  height: 416
   # Required: width of the trained model
-  width: 320
+  width: 416
+  # Required: type of model (ssd or yolo)
+  model_type: 'yolo'
+  # Required: path of label map
+  label_path: '/labelmap.txt'
+  # Optional: (but required for yolo) - anchors, comma separated
+  anchors:  '10,14,  23,27,  37,58,  81,82,  135,169,  344,319'
 ```
diff --git a/docs/docs/configuration/objects.mdx b/docs/docs/configuration/objects.mdx
index 3e95f9e83..b385f3460 100644
--- a/docs/docs/configuration/objects.mdx
+++ b/docs/docs/configuration/objects.mdx
@@ -24,6 +24,32 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo
 
 You also need to update the model width/height in the config if they differ from the defaults.
 
+You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU.
+
+A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models)
+
+Add it as a volume mount in your docker-compose file:
+```yaml
+    volumes:
+      - /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite
+```
+
+And then set the configuration for the model in config.yml:
+
+```yaml
+model:
+  # Required: height of the trained model
+  height: 416
+  # Required: width of the trained model
+  width: 416
+  # Required: type of model (ssd or yolo)
+  model_type: 'yolo'
+  # Required: path of label map
+  label_path: '/labelmap.txt'
+  # Optional: (but required for yolo) - anchors, comma separated
+  anchors:  '10,14,  23,27,  37,58,  81,82,  135,169,  344,319'
+```
+
 ### Customizing the Labelmap
 
 The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change:
diff --git a/frigate/app.py b/frigate/app.py
index 8e5178c79..4e24a81f3 100644
--- a/frigate/app.py
+++ b/frigate/app.py
@@ -31,6 +31,7 @@ from frigate.zeroconf import broadcast_zeroconf
 
 logger = logging.getLogger(__name__)
 
+
 class FrigateApp():
     def __init__(self):
         self.stop_event = mp.Event()
@@ -153,9 +154,9 @@ class FrigateApp():
 
         for name, detector in self.config.detectors.items():
             if detector.type == 'cpu':
-                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, 'cpu', detector.num_threads)
+                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, 'cpu', detector.num_threads)
             if detector.type == 'edgetpu':
-                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, detector.device, detector.num_threads)
+                self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, detector.device, detector.num_threads)
 
     def start_detected_frames_processor(self):
         self.detected_frames_processor = TrackedObjectProcessor(self.config, self.mqtt_client, self.config.mqtt.topic_prefix, 
diff --git a/frigate/config.py b/frigate/config.py
index 0878f3c1f..87aa7d5d4 100644
--- a/frigate/config.py
+++ b/frigate/config.py
@@ -244,9 +244,12 @@ FRIGATE_CONFIG_SCHEMA = vol.Schema(
         vol.Optional('database', default={}): {
             vol.Optional('path', default=os.path.join(CLIPS_DIR, 'frigate.db')): str
         },
-        vol.Optional('model', default={'width': 320, 'height': 320}): {
+        vol.Optional('model', default={'width': 320, 'height': 320, 'model_type': 'ssd', 'label_path': '/labelmap.txt'}): {
             vol.Required('width'): int,
-            vol.Required('height'): int
+            vol.Required('height'): int,
+            vol.Required('model_type') : vol.In(['ssd', 'yolo']),
+            vol.Required('label_path') : str,
+            vol.Optional('anchors', default="") : str
         },
         vol.Optional('detectors', default=DEFAULT_DETECTORS): DETECTORS_SCHEMA,
         'mqtt': MQTT_SCHEMA,
@@ -288,6 +291,9 @@ class ModelConfig():
     def __init__(self, config):
         self._width = config['width']
         self._height = config['height']
+        self._label_path = config['label_path']
+        self._model_type = config['model_type']
+        self._anchors = config['anchors']
 
     @property
     def width(self):
@@ -297,10 +303,25 @@ class ModelConfig():
     def height(self):
         return self._height
 
+    @property
+    def label_path(self):
+        return self._label_path
+
+    @property
+    def model_type(self):
+        return self._model_type
+
+    @property
+    def anchors(self):
+        return self._anchors
+
     def to_dict(self):
         return {
             'width': self.width,
-            'height': self.height
+            'height': self.height,
+            'label_path': self.label_path,
+            'model_type': self.model_type,
+            'anchors': self.anchors
         }
 
 class DetectorConfig():
diff --git a/frigate/edgetpu.py b/frigate/edgetpu.py
index d65ce523b..85d0b6a45 100644
--- a/frigate/edgetpu.py
+++ b/frigate/edgetpu.py
@@ -27,8 +27,10 @@ def load_labels(path, encoding='utf-8'):
   Returns:
     Dictionary mapping indices to labels.
   """
+  logger.warn(f"Loaded labels from {path}")
   with open(path, 'r', encoding=encoding) as f:
     lines = f.readlines()
+
     if not lines:
         return {}
 
@@ -44,13 +46,11 @@ class ObjectDetector(ABC):
         pass
 
 class LocalObjectDetector(ObjectDetector):
-    def __init__(self, tf_device=None, num_threads=3, labels=None):
+    def __init__(self, model_config, tf_device=None, num_threads=3):
         self.fps = EventsPerSecond()
-        if labels is None:
-            self.labels = {}
-        else:
-            self.labels = load_labels(labels)
-
+        self.labels = load_labels(model_config.label_path)
+        self.model_config = model_config
+            
         device_config = {"device": "usb"}
         if not tf_device is None:
             device_config = {"device": tf_device}
@@ -76,7 +76,11 @@ class LocalObjectDetector(ObjectDetector):
 
         self.tensor_input_details = self.interpreter.get_input_details()
         self.tensor_output_details = self.interpreter.get_output_details()
-    
+
+        if model_config.anchors != "":
+            anchors = [float(x) for x in model_config.anchors.split(',')]
+            self.anchors = np.array(anchors).reshape(-1, 2)
+
     def detect(self, tensor_input, threshold=.4):
         detections = []
 
@@ -93,20 +97,136 @@ class LocalObjectDetector(ObjectDetector):
         self.fps.update()
         return detections
 
+    def sigmoid(self, x):
+        return 1. / (1 + np.exp(-x))
+
     def detect_raw(self, tensor_input):
+        if self.model_config.model_type == "ssd":
+            raw_detections = self.detect_ssd(tensor_input)
+        elif self.model_config.model_type == "yolo":
+            raw_detections = self.detect_yolo(tensor_input)
+        else:
+            logger.error(f"Unsupported model type {self.model_config.model_type}")
+            raw_detections = []
+        return raw_detections
+
+
+    def get_interpreter_details(self):
+        # Get input and output tensor details
+        input_details = self.interpreter.get_input_details()
+        output_details = self.interpreter.get_output_details()
+        input_shape = input_details[0]["shape"]
+        return input_details, output_details, input_shape
+
+    # from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo
+    def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape):
+        grid_shape = outputs.shape[1:3]
+        n_anchors = len(anchors)
+
+        # Numpy screwaround to get the boxes in reasonable amount of time
+        grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32)
+        grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32)
+        outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1)
+        _anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32)
+
+        # Get box parameters from network output and apply transformations
+        bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0] 
+        by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1]
+        # Should these be inverted?
+        bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2]))
+        bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3]))
+        
+        # Get the scores 
+        scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \
+                self.sigmoid(outputs[..., 5:])
+        scores = scores.reshape(-1, n_classes)
+
+        # TODO: some of these are probably not needed but I don't understand numpy magic well enough
+        bx = bx.flatten()
+        by = (by.flatten()) * 1
+        bw = bw.flatten()
+        bh = bh.flatten() * 1
+        half_bw = bw / 2.
+        half_bh = bh / 2.
+
+        tl_x = np.multiply(bx - half_bw, 1)
+        tl_y = np.multiply(by - half_bh, 1) 
+        br_x = np.multiply(bx + half_bw, 1)
+        br_y = np.multiply(by + half_bh, 1)
+
+        # Get indices of boxes with score higher than threshold
+        indices = np.argwhere(scores >= 0.5)
+        selected_boxes = []
+        selected_scores = []
+        for i in indices:
+            i = tuple(i)
+            selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) )
+            selected_scores.append(scores[i])
+
+        selected_boxes = np.array(selected_boxes)
+        selected_scores = np.array(selected_scores)
+        selected_classes = indices[:, 1]
+
+        return selected_boxes, selected_scores, selected_classes
+        
+    def detect_ssd(self, tensor_input):
         self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
         self.interpreter.invoke()
         boxes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[0]['index']))
         label_codes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[1]['index']))
         scores = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[2]['index']))
 
+        # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
+        # more than 20 results.
         detections = np.zeros((20,6), np.float32)
         for i, score in enumerate(scores):
             detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
         
         return detections
 
-def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_shape, tf_device, num_threads):
+    def detect_yolo(self, tensor_input):
+        input_details, output_details, net_input_shape = \
+            self.get_interpreter_details()
+
+        self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
+        self.interpreter.invoke()
+
+        # for yolo, it's a little diffrent
+        out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index'])
+        out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index'])
+
+        # Dequantize output (tpu only)
+        o1_scale, o1_zero = self.tensor_output_details[0]['quantization']
+        out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
+        o2_scale, o2_zero = self.tensor_output_details[1]['quantization']
+        out2 = (out2.astype(np.float32) - o2_zero) * o2_scale
+
+        num_classes = len(self.labels)
+        _boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape)
+        _boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]],  len(self.labels), net_input_shape)
+
+        if _boxes1.shape[0] == 0:
+            _boxes1 = np.empty([0, 2, 2])
+            _scores1 = np.empty([0,])
+            _classes1 = np.empty([0,])
+        if _boxes2.shape[0] == 0:
+            _boxes2 = np.empty([0, 2, 2])
+            _scores2 = np.empty([0,])
+            _classes2 = np.empty([0,])
+        boxes = np.append(_boxes1, _boxes2, axis=0)
+        scores = np.append(_scores1, _scores2, axis=0)
+        label_codes = np.append(_classes1, _classes2, axis=0)
+
+        # TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
+        # more than 20 results.
+        detections = np.zeros((20,6), np.float32)
+        for i, score in enumerate(scores):
+            if i < 20:
+                detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]]
+        
+        return detections
+
+def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_config, tf_device, num_threads):
     threading.current_thread().name = f"detector:{name}"
     logger = logging.getLogger(f"detector.{name}")
     logger.info(f"Starting detection process: {os.getpid()}")
@@ -121,7 +241,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
     signal.signal(signal.SIGINT, receiveSignal)
 
     frame_manager = SharedMemoryFrameManager()
-    object_detector = LocalObjectDetector(tf_device=tf_device, num_threads=num_threads)
+    object_detector = LocalObjectDetector(model_config, tf_device=tf_device, num_threads=num_threads)
 
     outputs = {}
     for name in out_events.keys():
@@ -140,7 +260,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
             connection_id = detection_queue.get(timeout=5)
         except queue.Empty:
             continue
-        input_frame = frame_manager.get(connection_id, (1,model_shape[0],model_shape[1],3))
+        input_frame = frame_manager.get(connection_id, (1, model_config.height, model_config.width,3))
 
         if input_frame is None:
             continue
@@ -156,16 +276,16 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
         avg_speed.value = (avg_speed.value*9 + duration)/10
         
 class EdgeTPUProcess():
-    def __init__(self, name, detection_queue, out_events, model_shape, tf_device=None, num_threads=3):
+    def __init__(self, name, detection_queue, out_events, model_config, tf_device=None, num_threads=3):
         self.name = name
         self.out_events = out_events
         self.detection_queue = detection_queue
         self.avg_inference_speed = mp.Value('d', 0.01)
         self.detection_start = mp.Value('d', 0.0)
         self.detect_process = None
-        self.model_shape = model_shape
         self.tf_device = tf_device
         self.num_threads = num_threads
+        self.model_config = model_config
         self.start_or_restart()
     
     def stop(self):
@@ -181,7 +301,7 @@ class EdgeTPUProcess():
         self.detection_start.value = 0.0
         if (not self.detect_process is None) and self.detect_process.is_alive():
             self.stop()
-        self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_shape, self.tf_device, self.num_threads))
+        self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_config, self.tf_device, self.num_threads))
         self.detect_process.daemon = True
         self.detect_process.start()