diff --git a/config/coco_labels.txt b/config/coco_labels.txt new file mode 100644 index 000000000..1ee473cfa --- /dev/null +++ b/config/coco_labels.txt @@ -0,0 +1,80 @@ +0 person +1 bicycle +2 car +3 motorcycle +4 airplane +5 bus +6 train +7 truck +8 boat +9 traffic light +10 fire hydrant +12 stop sign +13 parking meter +14 bench +15 bird +16 cat +17 dog +18 horse +19 sheep +20 cow +21 elephant +22 bear +23 zebra +24 giraffe +26 backpack +27 umbrella +30 handbag +31 tie +32 suitcase +33 frisbee +34 skis +35 snowboard +36 sports ball +37 kite +38 baseball bat +39 baseball glove +40 skateboard +41 surfboard +42 tennis racket +43 bottle +45 wine glass +46 cup +47 fork +48 knife +49 spoon +50 bowl +51 banana +52 apple +53 sandwich +54 orange +55 broccoli +56 carrot +57 hot dog +58 pizza +59 donut +60 cake +61 chair +62 couch +63 potted plant +64 bed +66 dining table +69 toilet +71 tv +72 laptop +73 mouse +74 remote +75 keyboard +76 cell phone +77 microwave +78 oven +79 toaster +80 sink +81 refrigerator +83 book +84 clock +85 vase +86 scissors +87 teddy bear +88 hair drier +89 toothbrush \ No newline at end of file diff --git a/config/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite b/config/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite new file mode 100644 index 000000000..ee79e20b2 Binary files /dev/null and b/config/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite differ diff --git a/config/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite b/config/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite new file mode 100644 index 000000000..5032e476f Binary files /dev/null and b/config/mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite differ diff --git a/detect_objects.py b/detect_objects.py old mode 100644 new mode 100755 index a3dfb2215..a680a9bef --- a/detect_objects.py +++ b/detect_objects.py @@ -1,4 +1,5 @@ import cv2 +from PIL import Image import time import queue import yaml @@ -52,9 +53,11 @@ def main(): for name, config in CONFIG['cameras'].items(): cameras[name] = Camera(name, config, prepped_frame_queue, client, MQTT_TOPIC_PREFIX) + tf_args = CONFIG['tensorflow'] prepped_queue_processor = PreppedQueueProcessor( cameras, - prepped_frame_queue + prepped_frame_queue, + tf_args ) prepped_queue_processor.start() diff --git a/frigate/object_detection.py b/frigate/object_detection.py old mode 100644 new mode 100755 index 9006f4179..6c598a321 --- a/frigate/object_detection.py +++ b/frigate/object_detection.py @@ -1,6 +1,7 @@ import datetime import time import cv2 +from PIL import Image import threading import numpy as np from edgetpu.detection.engine import DetectionEngine @@ -13,6 +14,7 @@ PATH_TO_LABELS = '/label_map.pbtext' # Function to read labels from text files. def ReadLabelFile(file_path): + print("Loading Tensorflow model labels: " + str(file_path)) with open(file_path, 'r') as f: lines = f.readlines() ret = {} @@ -22,15 +24,26 @@ def ReadLabelFile(file_path): return ret class PreppedQueueProcessor(threading.Thread): - def __init__(self, cameras, prepped_frame_queue): + def __init__(self, cameras, prepped_frame_queue, tf_args): threading.Thread.__init__(self) self.cameras = cameras self.prepped_frame_queue = prepped_frame_queue + # print("tf_args: " + str(tf_args)) # Load the edgetpu engine and labels - self.engine = DetectionEngine(PATH_TO_CKPT) - self.labels = ReadLabelFile(PATH_TO_LABELS) + if tf_args != None: + tf_model = tf_args.get('model', None) + tf_labels = tf_args.get('labels', None) + else: + tf_model = PATH_TO_CKPT + tf_labels = PATH_TO_LABELS + print("Loading Tensorflow model: " + str(tf_model)) + self.engine = DetectionEngine(tf_model) + if tf_labels: + self.labels = ReadLabelFile(tf_labels) + else: + self.labels = None def run(self): # process queue... @@ -38,22 +51,37 @@ class PreppedQueueProcessor(threading.Thread): frame = self.prepped_frame_queue.get() # Actual detection. - objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=frame['region_threshold'], top_k=3) +# objects = self.engine.DetectWithInputTensor(frame['frame'], threshold=frame['region_threshold'], top_k=3) # print(self.engine.get_inference_time()) + objects = self.engine.DetectWithImage( + frame['img'], + threshold=frame['region_threshold'], + keep_aspect_ratio=True, + relative_coord=True, + top_k=3) + # parse and pass detected objects back to the camera parsed_objects = [] for obj in objects: + print("Detected an object: \n\n") + print("Detected object label index: " + str(obj.label_id) + "\n\n") box = obj.bounding_box.flatten().tolist() - parsed_objects.append({ + if self.labels: + obj_name = str(self.labels[obj.label_id]) + else: + obj_name = " " # no labels, just a yes/no type of detection + detection = { 'frame_time': frame['frame_time'], - 'name': str(self.labels[obj.label_id]), + 'name': obj_name, 'score': float(obj.score), 'xmin': int((box[0] * frame['region_size']) + frame['region_x_offset']), 'ymin': int((box[1] * frame['region_size']) + frame['region_y_offset']), 'xmax': int((box[2] * frame['region_size']) + frame['region_x_offset']), 'ymax': int((box[3] * frame['region_size']) + frame['region_y_offset']) - }) + } + print(str(detection) + "\n") + parsed_objects.append(detection) self.cameras[frame['camera_name']].add_objects(parsed_objects) @@ -90,19 +118,21 @@ class FramePrepper(threading.Thread): with self.frame_lock: cropped_frame = self.shared_frame[self.region_y_offset:self.region_y_offset+self.region_size, self.region_x_offset:self.region_x_offset+self.region_size].copy() frame_time = self.frame_time.value - - # Resize to 300x300 if needed - if cropped_frame.shape != (300, 300, 3): - cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR) - # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3] - frame_expanded = np.expand_dims(cropped_frame, axis=0) +# # Resize to 300x300 if needed +# if cropped_frame.shape != (300, 300, 3): +# cropped_frame = cv2.resize(cropped_frame, dsize=(300, 300), interpolation=cv2.INTER_LINEAR) +# # Expand dimensions since the model expects images to have shape: [1, 300, 300, 3] +# frame_expanded = np.expand_dims(cropped_frame, axis=0) + + img = Image.fromarray(cropped_frame) # add the frame to the queue if not self.prepped_frame_queue.full(): self.prepped_frame_queue.put({ 'camera_name': self.camera_name, 'frame_time': frame_time, - 'frame': frame_expanded.flatten().copy(), +# 'frame': frame_expanded.flatten().copy(), + 'img': img, 'region_size': self.region_size, 'region_threshold': self.region_threshold, 'region_x_offset': self.region_x_offset, diff --git a/frigate/util.py b/frigate/util.py old mode 100644 new mode 100755 index c73939247..414b7ad92 --- a/frigate/util.py +++ b/frigate/util.py @@ -6,7 +6,7 @@ def tonumpyarray(mp_arr): return np.frombuffer(mp_arr.get_obj(), dtype=np.uint8) def draw_box_with_label(frame, x_min, y_min, x_max, y_max, label): - color = (255,0,0) + color = (255,0,0) // red color cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2) diff --git a/frigate/video.py b/frigate/video.py old mode 100644 new mode 100755 index 2792097e5..8b0f8e444 --- a/frigate/video.py +++ b/frigate/video.py @@ -248,7 +248,7 @@ class Camera: ['pipe:']) ffmpeg_cmd = list(map(str, ffmpeg_cmd)) - print("".join(ffmpeg_cmd)) + print("[ffmpeg command line and args]:" + " ".join(ffmpeg_cmd)) self.ffmpeg_process = sp.Popen(ffmpeg_cmd, stdout = sp.PIPE, bufsize=self.frame_size) def start(self):