Initial support for YOLOv3-tiny

This commit is contained in:
Michael Wei 2021-04-24 23:48:42 +00:00
parent 42410a260c
commit 012e2ca00d
5 changed files with 194 additions and 20 deletions

View File

@ -112,7 +112,13 @@ detectors:
```yaml
model:
# Required: height of the trained model
height: 320
height: 416
# Required: width of the trained model
width: 320
width: 416
# Required: type of model (ssd or yolo)
model_type: 'yolo'
# Required: path of label map
label_path: '/labelmap.txt'
# Optional: (but required for yolo) - anchors, comma separated
anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
```

View File

@ -24,6 +24,32 @@ Models for both CPU and EdgeTPU (Coral) are bundled in the image. You can use yo
You also need to update the model width/height in the config if they differ from the defaults.
You can also try improving the speed using a YOLOv3-tiny model, quantized to work on the edge TPU.
A compiled model exists [here](https://github.com/guichristmann/edge-tpu-tiny-yolo/tree/master/models)
Add it as a volume mount in your docker-compose file:
```yaml
volumes:
- /path/to/quant_coco-tiny-v3-relu_edgetpu.tflite:/edgetpu_model.tflite
```
And then set the configuration for the model in config.yml:
```yaml
model:
# Required: height of the trained model
height: 416
# Required: width of the trained model
width: 416
# Required: type of model (ssd or yolo)
model_type: 'yolo'
# Required: path of label map
label_path: '/labelmap.txt'
# Optional: (but required for yolo) - anchors, comma separated
anchors: '10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
```
### Customizing the Labelmap
The labelmap can be customized to your needs. A common reason to do this is to combine multiple object types that are easily confused when you don't need to be as granular such as car/truck. You must retain the same number of labels, but you can change the names. To change:

View File

@ -31,6 +31,7 @@ from frigate.zeroconf import broadcast_zeroconf
logger = logging.getLogger(__name__)
class FrigateApp():
def __init__(self):
self.stop_event = mp.Event()
@ -153,9 +154,9 @@ class FrigateApp():
for name, detector in self.config.detectors.items():
if detector.type == 'cpu':
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, 'cpu', detector.num_threads)
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, 'cpu', detector.num_threads)
if detector.type == 'edgetpu':
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, model_shape, detector.device, detector.num_threads)
self.detectors[name] = EdgeTPUProcess(name, self.detection_queue, self.detection_out_events, self.config.model, detector.device, detector.num_threads)
def start_detected_frames_processor(self):
self.detected_frames_processor = TrackedObjectProcessor(self.config, self.mqtt_client, self.config.mqtt.topic_prefix,

View File

@ -244,9 +244,12 @@ FRIGATE_CONFIG_SCHEMA = vol.Schema(
vol.Optional('database', default={}): {
vol.Optional('path', default=os.path.join(CLIPS_DIR, 'frigate.db')): str
},
vol.Optional('model', default={'width': 320, 'height': 320}): {
vol.Optional('model', default={'width': 320, 'height': 320, 'model_type': 'ssd', 'label_path': '/labelmap.txt'}): {
vol.Required('width'): int,
vol.Required('height'): int
vol.Required('height'): int,
vol.Required('model_type') : vol.In(['ssd', 'yolo']),
vol.Required('label_path') : str,
vol.Optional('anchors', default="") : str
},
vol.Optional('detectors', default=DEFAULT_DETECTORS): DETECTORS_SCHEMA,
'mqtt': MQTT_SCHEMA,
@ -288,6 +291,9 @@ class ModelConfig():
def __init__(self, config):
self._width = config['width']
self._height = config['height']
self._label_path = config['label_path']
self._model_type = config['model_type']
self._anchors = config['anchors']
@property
def width(self):
@ -297,10 +303,25 @@ class ModelConfig():
def height(self):
return self._height
@property
def label_path(self):
return self._label_path
@property
def model_type(self):
return self._model_type
@property
def anchors(self):
return self._anchors
def to_dict(self):
return {
'width': self.width,
'height': self.height
'height': self.height,
'label_path': self.label_path,
'model_type': self.model_type,
'anchors': self.anchors
}
class DetectorConfig():

View File

@ -27,8 +27,10 @@ def load_labels(path, encoding='utf-8'):
Returns:
Dictionary mapping indices to labels.
"""
logger.warn(f"Loaded labels from {path}")
with open(path, 'r', encoding=encoding) as f:
lines = f.readlines()
if not lines:
return {}
@ -44,13 +46,11 @@ class ObjectDetector(ABC):
pass
class LocalObjectDetector(ObjectDetector):
def __init__(self, tf_device=None, num_threads=3, labels=None):
def __init__(self, model_config, tf_device=None, num_threads=3):
self.fps = EventsPerSecond()
if labels is None:
self.labels = {}
else:
self.labels = load_labels(labels)
self.labels = load_labels(model_config.label_path)
self.model_config = model_config
device_config = {"device": "usb"}
if not tf_device is None:
device_config = {"device": tf_device}
@ -76,7 +76,11 @@ class LocalObjectDetector(ObjectDetector):
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
if model_config.anchors != "":
anchors = [float(x) for x in model_config.anchors.split(',')]
self.anchors = np.array(anchors).reshape(-1, 2)
def detect(self, tensor_input, threshold=.4):
detections = []
@ -93,20 +97,136 @@ class LocalObjectDetector(ObjectDetector):
self.fps.update()
return detections
def sigmoid(self, x):
return 1. / (1 + np.exp(-x))
def detect_raw(self, tensor_input):
if self.model_config.model_type == "ssd":
raw_detections = self.detect_ssd(tensor_input)
elif self.model_config.model_type == "yolo":
raw_detections = self.detect_yolo(tensor_input)
else:
logger.error(f"Unsupported model type {self.model_config.model_type}")
raw_detections = []
return raw_detections
def get_interpreter_details(self):
# Get input and output tensor details
input_details = self.interpreter.get_input_details()
output_details = self.interpreter.get_output_details()
input_shape = input_details[0]["shape"]
return input_details, output_details, input_shape
# from util.py in https://github.com/guichristmann/edge-tpu-tiny-yolo
def featuresToBoxes(self, outputs, anchors, n_classes, net_input_shape):
grid_shape = outputs.shape[1:3]
n_anchors = len(anchors)
# Numpy screwaround to get the boxes in reasonable amount of time
grid_y = np.tile(np.arange(grid_shape[0]).reshape(-1, 1), grid_shape[0]).reshape(1, grid_shape[0], grid_shape[0], 1).astype(np.float32)
grid_x = grid_y.copy().T.reshape(1, grid_shape[0], grid_shape[1], 1).astype(np.float32)
outputs = outputs.reshape(1, grid_shape[0], grid_shape[1], n_anchors, -1)
_anchors = anchors.reshape(1, 1, 3, 2).astype(np.float32)
# Get box parameters from network output and apply transformations
bx = (self.sigmoid(outputs[..., 0]) + grid_x) / grid_shape[0]
by = (self.sigmoid(outputs[..., 1]) + grid_y) / grid_shape[1]
# Should these be inverted?
bw = np.multiply(_anchors[..., 0] / net_input_shape[1], np.exp(outputs[..., 2]))
bh = np.multiply(_anchors[..., 1] / net_input_shape[2], np.exp(outputs[..., 3]))
# Get the scores
scores = self.sigmoid(np.expand_dims(outputs[..., 4], -1)) * \
self.sigmoid(outputs[..., 5:])
scores = scores.reshape(-1, n_classes)
# TODO: some of these are probably not needed but I don't understand numpy magic well enough
bx = bx.flatten()
by = (by.flatten()) * 1
bw = bw.flatten()
bh = bh.flatten() * 1
half_bw = bw / 2.
half_bh = bh / 2.
tl_x = np.multiply(bx - half_bw, 1)
tl_y = np.multiply(by - half_bh, 1)
br_x = np.multiply(bx + half_bw, 1)
br_y = np.multiply(by + half_bh, 1)
# Get indices of boxes with score higher than threshold
indices = np.argwhere(scores >= 0.5)
selected_boxes = []
selected_scores = []
for i in indices:
i = tuple(i)
selected_boxes.append( ((tl_x[i[0]], tl_y[i[0]]), (br_x[i[0]], br_y[i[0]])) )
selected_scores.append(scores[i])
selected_boxes = np.array(selected_boxes)
selected_scores = np.array(selected_scores)
selected_classes = indices[:, 1]
return selected_boxes, selected_scores, selected_classes
def detect_ssd(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
self.interpreter.invoke()
boxes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[0]['index']))
label_codes = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[1]['index']))
scores = np.squeeze(self.interpreter.get_tensor(self.tensor_output_details[2]['index']))
# TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
# more than 20 results.
detections = np.zeros((20,6), np.float32)
for i, score in enumerate(scores):
detections[i] = [label_codes[i], score, boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]]
return detections
def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_shape, tf_device, num_threads):
def detect_yolo(self, tensor_input):
input_details, output_details, net_input_shape = \
self.get_interpreter_details()
self.interpreter.set_tensor(self.tensor_input_details[0]['index'], tensor_input)
self.interpreter.invoke()
# for yolo, it's a little diffrent
out1 = self.interpreter.get_tensor(self.tensor_output_details[0]['index'])
out2 = self.interpreter.get_tensor(self.tensor_output_details[1]['index'])
# Dequantize output (tpu only)
o1_scale, o1_zero = self.tensor_output_details[0]['quantization']
out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
o2_scale, o2_zero = self.tensor_output_details[1]['quantization']
out2 = (out2.astype(np.float32) - o2_zero) * o2_scale
num_classes = len(self.labels)
_boxes1, _scores1, _classes1 = self.featuresToBoxes(out1, self.anchors[[3, 4, 5]], len(self.labels), net_input_shape)
_boxes2, _scores2, _classes2 = self.featuresToBoxes(out2, self.anchors[[1, 2, 3]], len(self.labels), net_input_shape)
if _boxes1.shape[0] == 0:
_boxes1 = np.empty([0, 2, 2])
_scores1 = np.empty([0,])
_classes1 = np.empty([0,])
if _boxes2.shape[0] == 0:
_boxes2 = np.empty([0, 2, 2])
_scores2 = np.empty([0,])
_classes2 = np.empty([0,])
boxes = np.append(_boxes1, _boxes2, axis=0)
scores = np.append(_scores1, _scores2, axis=0)
label_codes = np.append(_classes1, _classes2, axis=0)
# TODO: there's definitely a bug here. 20 seems to be artifically chosen and you'll get an indexing error if there's
# more than 20 results.
detections = np.zeros((20,6), np.float32)
for i, score in enumerate(scores):
if i < 20:
detections[i] = [label_codes[i], score, boxes[i][0][1], boxes[i][0][0], boxes[i][1][1], boxes[i][1][0]]
return detections
def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.Event], avg_speed, start, model_config, tf_device, num_threads):
threading.current_thread().name = f"detector:{name}"
logger = logging.getLogger(f"detector.{name}")
logger.info(f"Starting detection process: {os.getpid()}")
@ -121,7 +241,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
signal.signal(signal.SIGINT, receiveSignal)
frame_manager = SharedMemoryFrameManager()
object_detector = LocalObjectDetector(tf_device=tf_device, num_threads=num_threads)
object_detector = LocalObjectDetector(model_config, tf_device=tf_device, num_threads=num_threads)
outputs = {}
for name in out_events.keys():
@ -140,7 +260,7 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
connection_id = detection_queue.get(timeout=5)
except queue.Empty:
continue
input_frame = frame_manager.get(connection_id, (1,model_shape[0],model_shape[1],3))
input_frame = frame_manager.get(connection_id, (1, model_config.height, model_config.width,3))
if input_frame is None:
continue
@ -156,16 +276,16 @@ def run_detector(name: str, detection_queue: mp.Queue, out_events: Dict[str, mp.
avg_speed.value = (avg_speed.value*9 + duration)/10
class EdgeTPUProcess():
def __init__(self, name, detection_queue, out_events, model_shape, tf_device=None, num_threads=3):
def __init__(self, name, detection_queue, out_events, model_config, tf_device=None, num_threads=3):
self.name = name
self.out_events = out_events
self.detection_queue = detection_queue
self.avg_inference_speed = mp.Value('d', 0.01)
self.detection_start = mp.Value('d', 0.0)
self.detect_process = None
self.model_shape = model_shape
self.tf_device = tf_device
self.num_threads = num_threads
self.model_config = model_config
self.start_or_restart()
def stop(self):
@ -181,7 +301,7 @@ class EdgeTPUProcess():
self.detection_start.value = 0.0
if (not self.detect_process is None) and self.detect_process.is_alive():
self.stop()
self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_shape, self.tf_device, self.num_threads))
self.detect_process = mp.Process(target=run_detector, name=f"detector:{self.name}", args=(self.name, self.detection_queue, self.out_events, self.avg_inference_speed, self.detection_start, self.model_config, self.tf_device, self.num_threads))
self.detect_process.daemon = True
self.detect_process.start()