diff --git a/frigate/app.py b/frigate/app.py index 9d85f461e..e61510f7b 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -29,6 +29,7 @@ from frigate.const import ( MODEL_CACHE_DIR, RECORD_DIR, ) +from frigate.events.audio import listen_to_audio from frigate.events.cleanup import EventCleanup from frigate.events.external import ExternalEventProcessor from frigate.events.maintainer import EventProcessor @@ -390,6 +391,14 @@ class FrigateApp: capture_process.start() logger.info(f"Capture process started for {name}: {capture_process.pid}") + def start_audio_processors(self) -> None: + audio_process = mp.Process( + target=listen_to_audio, + name=f"audio_capture", + args=(self.config, self.event_queue) + ) + logger.info(f"Audio process started: {audio_process.pid}") + def start_timeline_processor(self) -> None: self.timeline_processor = TimelineProcessor( self.config, self.timeline_queue, self.stop_event @@ -486,6 +495,7 @@ class FrigateApp: self.start_detected_frames_processor() self.start_camera_processors() self.start_camera_capture_processes() + self.start_audio_processors() self.start_storage_maintainer() self.init_stats() self.init_external_event_processor() diff --git a/frigate/detectors/plugins/audio_tfl.py b/frigate/detectors/plugins/audio_tfl.py deleted file mode 100644 index ac5c50381..000000000 --- a/frigate/detectors/plugins/audio_tfl.py +++ /dev/null @@ -1,75 +0,0 @@ -import logging - -import numpy as np -from pydantic import Field -from typing_extensions import Literal - -from frigate.detectors.detection_api import DetectionApi -from frigate.object_detection import load_labels - -try: - from tflite_runtime.interpreter import Interpreter -except ModuleNotFoundError: - from tensorflow.lite.python.interpreter import Interpreter - - -logger = logging.getLogger(__name__) - -DETECTOR_KEY = "audio" - - -class AudioTfl(DetectionApi): - type_key = DETECTOR_KEY - - def __init__(self, labels): - self.labels = load_labels("/audio-labelmap.txt") - self.interpreter = Interpreter( - model_path="/cpu_audio_model.tflite", - num_threads=2, - ) - - self.interpreter.allocate_tensors() - - self.tensor_input_details = self.interpreter.get_input_details() - self.tensor_output_details = self.interpreter.get_output_details() - - def _detect_raw(self, tensor_input): - self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) - self.interpreter.invoke() - detections = np.zeros((20, 6), np.float32) - - res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0] - non_zero_indices = res > 0 - class_ids = np.argpartition(-res, 20)[:20] - class_ids = class_ids[np.argsort(-res[class_ids])] - class_ids = class_ids[non_zero_indices[class_ids]] - scores = res[class_ids] - boxes = np.full((scores.shape[0], 4), -1, np.float32) - count = len(scores) - - for i in range(count): - if scores[i] < 0.4 or i == 20: - break - detections[i] = [ - class_ids[i], - float(scores[i]), - boxes[i][0], - boxes[i][1], - boxes[i][2], - boxes[i][3], - ] - - return detections - - def detect(self, tensor_input, threshold=0.8): - detections = [] - - raw_detections = self._detect_raw(tensor_input) - - for d in raw_detections: - if d[1] < threshold: - break - detections.append( - (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5])) - ) - return detections diff --git a/frigate/events/audio.py b/frigate/events/audio.py index 7b792bca5..ddaa2c69d 100644 --- a/frigate/events/audio.py +++ b/frigate/events/audio.py @@ -19,9 +19,15 @@ from frigate.const import ( AUDIO_SAMPLE_RATE, CACHE_DIR, ) -from frigate.detectors.plugins.audio_tfl import AudioTfl +from frigate.detectors.detection_api import DetectionApi +from frigate.object_detection import load_labels from frigate.util import listen +try: + from tflite_runtime.interpreter import Interpreter +except ModuleNotFoundError: + from tensorflow.lite.python.interpreter import Interpreter + logger = logging.getLogger(__name__) FFMPEG_COMMAND = ( @@ -47,6 +53,61 @@ def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None: AudioEventMaintainer(camera, stop_event) +class AudioTfl(DetectionApi): + def __init__(self, labels): + self.labels = load_labels("/audio-labelmap.txt") + self.interpreter = Interpreter( + model_path="/cpu_audio_model.tflite", + num_threads=2, + ) + + self.interpreter.allocate_tensors() + + self.tensor_input_details = self.interpreter.get_input_details() + self.tensor_output_details = self.interpreter.get_output_details() + + def _detect_raw(self, tensor_input): + self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) + self.interpreter.invoke() + detections = np.zeros((20, 6), np.float32) + + res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0] + non_zero_indices = res > 0 + class_ids = np.argpartition(-res, 20)[:20] + class_ids = class_ids[np.argsort(-res[class_ids])] + class_ids = class_ids[non_zero_indices[class_ids]] + scores = res[class_ids] + boxes = np.full((scores.shape[0], 4), -1, np.float32) + count = len(scores) + + for i in range(count): + if scores[i] < 0.4 or i == 20: + break + detections[i] = [ + class_ids[i], + float(scores[i]), + boxes[i][0], + boxes[i][1], + boxes[i][2], + boxes[i][3], + ] + + return detections + + def detect(self, tensor_input, threshold=0.8): + detections = [] + + raw_detections = self._detect_raw(tensor_input) + + for d in raw_detections: + if d[1] < threshold: + break + detections.append( + (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5])) + ) + return detections + + class AudioEventMaintainer(threading.Thread): def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None: threading.Thread.__init__(self)