From 8ffdcc95c613dc9d1544fb7645bc576f8faf975d Mon Sep 17 00:00:00 2001
From: lkorth <luke@lukekorth.com>
Date: Sat, 22 Jun 2019 15:03:11 -0400
Subject: [PATCH] Use cv2 to draw object boxes and labels instead of tensorflow
 object_detection.utils

This facilitates removing tensorflow models and protobuf-python from the
docker image greatly reducing image build time and image size by ~1.3GB
---
 Dockerfile         | 19 -------------
 frigate/objects.py | 35 +++++++++++-------------
 frigate/util.py    | 22 +++++++++++++++-
 frigate/video.py   | 66 ++++++++++++++++++++--------------------------
 4 files changed, 65 insertions(+), 77 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 19de5dde8..658909207 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,21 +50,6 @@ RUN  pip install -U pip \
  paho-mqtt \
  PyYAML
 
-# Install tensorflow models object detection
-RUN GIT_SSL_NO_VERIFY=true git clone -q https://github.com/tensorflow/models /usr/local/lib/python3.5/dist-packages/tensorflow/models
-RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/google/protobuf/releases/download/v3.5.1/protobuf-python-3.5.1.tar.gz
-
-# Download & build protobuf-python
-RUN cd /usr/local/src/ \
- && tar xf protobuf-python-3.5.1.tar.gz \
- && rm protobuf-python-3.5.1.tar.gz \
- && cd /usr/local/src/protobuf-3.5.1/ \
- && ./configure \
- && make \
- && make install \
- && ldconfig \
- && rm -rf /usr/local/src/protobuf-3.5.1/
-
 # Download & build OpenCV
 RUN wget -q -P /usr/local/src/ --no-check-certificate https://github.com/opencv/opencv/archive/4.0.1.zip
 RUN cd /usr/local/src/ \
@@ -97,10 +82,6 @@ RUN ln -s /python-tflite-source/edgetpu/test_data/coco_labels.txt /coco_labels.t
 RUN (apt-get autoremove -y; \
      apt-get autoclean -y)
 
-# Set TF object detection available
-ENV PYTHONPATH "$PYTHONPATH:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research:/usr/local/lib/python3.5/dist-packages/tensorflow/models/research/slim"
-RUN cd /usr/local/lib/python3.5/dist-packages/tensorflow/models/research && protoc object_detection/protos/*.proto --python_out=.
-
 WORKDIR /opt/frigate/
 ADD frigate frigate/
 COPY detect_objects.py .
diff --git a/frigate/objects.py b/frigate/objects.py
index 5c5a2e8ac..5bec32365 100644
--- a/frigate/objects.py
+++ b/frigate/objects.py
@@ -2,7 +2,7 @@ import time
 import datetime
 import threading
 import cv2
-from object_detection.utils import visualization_utils as vis_util
+from . util import drawobjectbox
 
 class ObjectCleaner(threading.Thread):
     def __init__(self, objects_parsed, detected_objects):
@@ -70,27 +70,24 @@ class BestPersonFrame(threading.Thread):
             # if there is already a best_person
             else:
                 now = datetime.datetime.now().timestamp()
-                # if the new best person is a higher score than the current best person 
+                # if the new best person is a higher score than the current best person
                 # or the current person is more than 1 minute old, use the new best person
                 if new_best_person['score'] > self.best_person['score'] or (now - self.best_person['frame_time']) > 60:
                     self.best_person = new_best_person
-            
+
             # make a copy of the recent frames
             recent_frames = self.recent_frames.copy()
-            
-            if not self.best_person is None and self.best_person['frame_time'] in recent_frames:
-                best_frame = recent_frames[self.best_person['frame_time']]
-                best_frame = cv2.cvtColor(best_frame, cv2.COLOR_BGR2RGB)
-                # draw the bounding box on the frame
-                vis_util.draw_bounding_box_on_image_array(best_frame,
-                    self.best_person['ymin'],
-                    self.best_person['xmin'],
-                    self.best_person['ymax'],
-                    self.best_person['xmax'],
-                    color='red',
-                    thickness=2,
-                    display_str_list=["{}: {}%".format(self.best_person['name'],int(self.best_person['score']*100))],
-                    use_normalized_coordinates=False)
 
-                # convert back to BGR
-                self.best_frame = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR)
+            if not self.best_person is None and self.best_person['frame_time'] in recent_frames:
+                self.best_frame = recent_frames[self.best_person['frame_time']]
+
+                label = "{}: {}%".format(self.best_person['name'], int(self.best_person['score'] * 100))
+                bounding_box = (
+                    self.best_person['xmin'],
+                    self.best_person['ymin'],
+                    self.best_person['xmax'],
+                    self.best_person['ymax']
+                )
+
+                # draw the bounding box on the frame
+                drawobjectbox(self.best_frame, label, bounding_box)
diff --git a/frigate/util.py b/frigate/util.py
index fa174f195..7f62afc7a 100644
--- a/frigate/util.py
+++ b/frigate/util.py
@@ -1,5 +1,25 @@
 import numpy as np
+import cv2
+
+LABEL_FONT = cv2.FONT_HERSHEY_PLAIN
+FONT_SCALE = 0.8
 
 # convert shared memory array into numpy array
 def tonumpyarray(mp_arr):
-    return np.frombuffer(mp_arr.get_obj(), dtype=np.uint8)
\ No newline at end of file
+    return np.frombuffer(mp_arr.get_obj(), dtype=np.uint8)
+
+# draw a box with text in the upper left on the image
+def drawobjectbox(image, text, rect):
+    x1, y1, x2, y2 = rect
+
+    # draw the red bounding box
+    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)
+
+    # get the size of the text
+    (text_width, text_height) = cv2.getTextSize(text, LABEL_FONT, FONT_SCALE, 1)[0]
+
+    # draw the text background with padding
+    cv2.rectangle(image, (x1, y1), (x1 + text_width + 8, y1 - text_height - 8), (0, 0, 255), cv2.FILLED)
+
+    # draw the text
+    cv2.putText(image, text, (x1 + 4, y1 - 4), LABEL_FONT, FONT_SCALE, (0, 0, 0), lineType=cv2.LINE_AA)
diff --git a/frigate/video.py b/frigate/video.py
index 63e2b09bc..09f8dd07d 100644
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -6,8 +6,8 @@ import threading
 import ctypes
 import multiprocessing as mp
 import numpy as np
-from object_detection.utils import visualization_utils as vis_util
 from . util import tonumpyarray
+from . util import drawobjectbox
 from . object_detection import FramePrepper
 from . objects import ObjectCleaner, BestPersonFrame
 from . mqtt import MqttObjectPublisher
@@ -54,10 +54,10 @@ def fetch_frames(shared_arr, shared_frame_time, frame_lock, frame_ready, frame_s
         else:
             print("Unable to grab a frame")
             bad_frame_counter += 1
-        
+
         if bad_frame_counter > 100:
             video.release()
-    
+
     video.release()
 
 # Stores 2 seconds worth of frames when motion is detected so they can be used for other threads
@@ -79,12 +79,12 @@ class FrameTracker(threading.Thread):
                 # if there isnt a frame ready for processing or it is old, wait for a signal
                 if self.frame_time.value == frame_time or (now - self.frame_time.value) > 0.5:
                     self.frame_ready.wait()
-            
+
             # lock and make a copy of the frame
-            with self.frame_lock: 
+            with self.frame_lock:
                 frame = self.shared_frame.copy()
                 frame_time = self.frame_time.value
-            
+
             # add the frame to recent frames
             self.recent_frames[frame_time] = frame
 
@@ -106,7 +106,7 @@ def get_frame_shape(rtsp_url):
 def get_rtsp_url(rtsp_config):
     if (rtsp_config['password'].startswith('$')):
         rtsp_config['password'] = os.getenv(rtsp_config['password'][1:])
-    return 'rtsp://{}:{}@{}:{}{}'.format(rtsp_config['user'], 
+    return 'rtsp://{}:{}@{}:{}{}'.format(rtsp_config['user'],
         rtsp_config['password'], rtsp_config['host'], rtsp_config['port'],
         rtsp_config['path'])
 
@@ -174,9 +174,9 @@ class Camera:
                 region['size'], region['x_offset'], region['y_offset'], region['threshold'],
                 prepped_frame_queue
             ))
-        
+
         # start a thread to store recent motion frames for processing
-        self.frame_tracker = FrameTracker(self.shared_frame_np, self.shared_frame_time, 
+        self.frame_tracker = FrameTracker(self.shared_frame_np, self.shared_frame_time,
             self.frame_ready, self.frame_lock, self.recent_frames)
         self.frame_tracker.start()
 
@@ -209,28 +209,28 @@ class Camera:
             self.capture_process.terminate()
             del self.capture_process
             self.capture_process = None
-            
+
         # create the process to capture frames from the RTSP stream and store in a shared array
         print("Creating a new capture process...")
-        self.capture_process = mp.Process(target=fetch_frames, args=(self.shared_frame_array, 
+        self.capture_process = mp.Process(target=fetch_frames, args=(self.shared_frame_array,
             self.shared_frame_time, self.frame_lock, self.frame_ready, self.frame_shape, self.rtsp_url))
         self.capture_process.daemon = True
         print("Starting a new capture process...")
         self.capture_process.start()
-    
+
     def start(self):
         self.start_or_restart_capture()
         # start the object detection prep threads
         for detection_prep_thread in self.detection_prep_threads:
             detection_prep_thread.start()
         self.watchdog.start()
-    
+
     def join(self):
         self.capture_process.join()
-    
+
     def get_capture_pid(self):
         return self.capture_process.pid
-    
+
     def add_objects(self, objects):
         if len(objects) == 0:
             return
@@ -246,15 +246,15 @@ class Camera:
                             obj['ymin'] >= r['y_offset'] and
                             obj['xmax'] <= r['x_offset']+r['size'] and
                             obj['ymax'] <= r['y_offset']+r['size']
-                        ): 
+                        ):
                         region = r
                         break
-                
+
                 # if the min person area is larger than the
                 # detected person, don't add it to detected objects
                 if region and region['min_person_area'] > person_area:
                     continue
-            
+
                 # compute the coordinates of the person and make sure
                 # the location isnt outide the bounds of the image (can happen from rounding)
                 y_location = min(int(obj['ymax']), len(self.mask)-1)
@@ -271,7 +271,7 @@ class Camera:
 
     def get_best_person(self):
         return self.best_person_frame.best_frame
-    
+
     def get_current_frame_with_objects(self):
         # make a copy of the current detected objects
         detected_objects = self.detected_objects.copy()
@@ -279,31 +279,21 @@ class Camera:
         with self.frame_lock:
             frame = self.shared_frame_np.copy()
 
-        # convert to RGB for drawing
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        # draw the bounding boxes on the screen
         for obj in detected_objects:
-            vis_util.draw_bounding_box_on_image_array(frame,
-                obj['ymin'],
+            label = "{}: {}%".format(obj['name'], int(obj['score'] * 100))
+            bounding_box = (
                 obj['xmin'],
-                obj['ymax'],
+                obj['ymin'],
                 obj['xmax'],
-                color='red',
-                thickness=2,
-                display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
-                use_normalized_coordinates=False)
+                obj['ymax']
+            )
+
+            drawobjectbox(frame, label, bounding_box)
 
         for region in self.regions:
             color = (255,255,255)
-            cv2.rectangle(frame, (region['x_offset'], region['y_offset']), 
-                (region['x_offset']+region['size'], region['y_offset']+region['size']), 
+            cv2.rectangle(frame, (region['x_offset'], region['y_offset']),
+                (region['x_offset']+region['size'], region['y_offset']+region['size']),
                 color, 2)
 
-        # convert back to BGR
-        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-
         return frame
-
-
-    
-        
\ No newline at end of file