From 2a83a3626c7c5f81482405dbe4cef95993037e02 Mon Sep 17 00:00:00 2001
From: Nick Mowen <nickmowen213@gmail.com>
Date: Sat, 17 Jun 2023 16:48:41 -0600
Subject: [PATCH] Add the detector

---
 Dockerfile                             |  1 -
 frigate/app.py                         |  7 +++
 frigate/const.py                       |  4 ++
 frigate/detectors/plugins/audio_tfl.py | 65 ++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 frigate/detectors/plugins/audio_tfl.py

diff --git a/Dockerfile b/Dockerfile
index 6e1f608fa..cd9870ed6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -94,7 +94,6 @@ COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model
 RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \
     sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt
 # Get Audio Model and labels
-RUN wget -qO edgetpu_audio_model.tflite https://tfhub.dev/google/coral-model/yamnet/classification/coral/1?coral-format=tflite
 RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
 COPY audio-labelmap.txt .
 
diff --git a/frigate/app.py b/frigate/app.py
index 9d85f461e..d37372538 100644
--- a/frigate/app.py
+++ b/frigate/app.py
@@ -21,6 +21,7 @@ from frigate.comms.mqtt import MqttClient
 from frigate.comms.ws import WebSocketClient
 from frigate.config import FrigateConfig
 from frigate.const import (
+    AUDIO_DETECTOR,
     CACHE_DIR,
     CLIPS_DIR,
     CONFIG_DIR,
@@ -322,6 +323,12 @@ class FrigateApp:
                 detector_config,
             )
 
+        if any(c.audio.enabled for c in self.config.cameras.items()):
+            self.detectors[AUDIO_DETECTOR] = ObjectDetectProcess(
+                AUDIO_DETECTOR,
+                
+            )
+
     def start_detected_frames_processor(self) -> None:
         self.detected_frames_processor = TrackedObjectProcessor(
             self.config,
diff --git a/frigate/const.py b/frigate/const.py
index 9b7e177f2..987f04b46 100644
--- a/frigate/const.py
+++ b/frigate/const.py
@@ -12,6 +12,10 @@ PLUS_ENV_VAR = "PLUS_API_KEY"
 PLUS_API_HOST = "https://api.frigate.video"
 BTBN_PATH = "/usr/lib/btbn-ffmpeg"
 
+# Audio Consts
+
+AUDIO_DETECTOR = "audio-detector"
+
 # Regex Consts
 
 REGEX_CAMERA_NAME = r"^[a-zA-Z0-9_-]+$"
diff --git a/frigate/detectors/plugins/audio_tfl.py b/frigate/detectors/plugins/audio_tfl.py
new file mode 100644
index 000000000..cc9252417
--- /dev/null
+++ b/frigate/detectors/plugins/audio_tfl.py
@@ -0,0 +1,65 @@
+import logging
+
+import numpy as np
+from pydantic import Field
+from typing_extensions import Literal
+
+from frigate.detectors.detection_api import DetectionApi
+from frigate.detectors.detector_config import BaseDetectorConfig
+
+try:
+    from tflite_runtime.interpreter import Interpreter
+except ModuleNotFoundError:
+    from tensorflow.lite.python.interpreter import Interpreter
+
+
+logger = logging.getLogger(__name__)
+
+DETECTOR_KEY = "audio"
+
+
+class AudioDetectorConfig(BaseDetectorConfig):
+    type: Literal[DETECTOR_KEY]
+
+
+class CpuTfl(DetectionApi):
+    type_key = DETECTOR_KEY
+
+    def __init__(self, detector_config: AudioDetectorConfig):
+        self.interpreter = Interpreter(
+            model_path="/cpu_audio_model.tflite",
+            num_threads=3,
+        )
+
+        self.interpreter.allocate_tensors()
+
+        self.tensor_input_details = self.interpreter.get_input_details()
+        self.tensor_output_details = self.interpreter.get_output_details()
+
+    def detect_raw(self, tensor_input):
+        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
+        self.interpreter.invoke()
+        detections = np.zeros((20, 6), np.float32)
+
+        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
+        non_zero_indices = res > 0
+        class_ids = np.argpartition(-res, 20)[:20]
+        class_ids = class_ids[np.argsort(-res[class_ids])]
+        class_ids = class_ids[non_zero_indices[class_ids]]
+        scores = res[class_ids]
+        boxes = np.full((scores.shape[0], 4), -1, np.float32)
+        count = len(scores)
+
+        for i in range(count):
+            if scores[i] < 0.4 or i == 20:
+                break
+            detections[i] = [
+                class_ids[i],
+                float(scores[i]),
+                boxes[i][0],
+                boxes[i][1],
+                boxes[i][2],
+                boxes[i][3],
+            ]
+
+        return detections