Start audio process

2026-02-05 10:45:21 +03:00 · 2023-06-18 13:37:52 -06:00 · 2023-06-18 13:37:52 -06:00 · 10e194b0d1
commit 10e194b0d1
parent 386e388f75
3 changed files with 72 additions and 76 deletions
--- a/frigate/app.py
+++ b/frigate/app.py
@ -29,6 +29,7 @@ from frigate.const import (
    MODEL_CACHE_DIR,
    RECORD_DIR,
 )
 from frigate.events.audio import listen_to_audio
 from frigate.events.cleanup import EventCleanup
 from frigate.events.external import ExternalEventProcessor
 from frigate.events.maintainer import EventProcessor
@ -390,6 +391,14 @@ class FrigateApp:
            capture_process.start()
            logger.info(f"Capture process started for {name}: {capture_process.pid}")
    def start_audio_processors(self) -> None:
        audio_process = mp.Process(
            target=listen_to_audio,
            name=f"audio_capture",
            args=(self.config, self.event_queue)
        )
        logger.info(f"Audio process started: {audio_process.pid}")
    def start_timeline_processor(self) -> None:
        self.timeline_processor = TimelineProcessor(
            self.config, self.timeline_queue, self.stop_event
@ -486,6 +495,7 @@ class FrigateApp:
        self.start_detected_frames_processor()
        self.start_camera_processors()
        self.start_camera_capture_processes()
        self.start_audio_processors()
        self.start_storage_maintainer()
        self.init_stats()
        self.init_external_event_processor()
--- a/frigate/detectors/plugins/audio_tfl.py
+++ b/frigate/detectors/plugins/audio_tfl.py
@ -1,75 +0,0 @@
 import logging
 import numpy as np
 from pydantic import Field
 from typing_extensions import Literal
 from frigate.detectors.detection_api import DetectionApi
 from frigate.object_detection import load_labels
 try:
    from tflite_runtime.interpreter import Interpreter
 except ModuleNotFoundError:
    from tensorflow.lite.python.interpreter import Interpreter
 logger = logging.getLogger(__name__)
 DETECTOR_KEY = "audio"
 class AudioTfl(DetectionApi):
    type_key = DETECTOR_KEY
    def __init__(self, labels):
        self.labels = load_labels("/audio-labelmap.txt")
        self.interpreter = Interpreter(
            model_path="/cpu_audio_model.tflite",
            num_threads=2,
        )
        self.interpreter.allocate_tensors()
        self.tensor_input_details = self.interpreter.get_input_details()
        self.tensor_output_details = self.interpreter.get_output_details()
    def _detect_raw(self, tensor_input):
        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
        self.interpreter.invoke()
        detections = np.zeros((20, 6), np.float32)
        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
        non_zero_indices = res > 0
        class_ids = np.argpartition(-res, 20)[:20]
        class_ids = class_ids[np.argsort(-res[class_ids])]
        class_ids = class_ids[non_zero_indices[class_ids]]
        scores = res[class_ids]
        boxes = np.full((scores.shape[0], 4), -1, np.float32)
        count = len(scores)
        for i in range(count):
            if scores[i] < 0.4 or i == 20:
                break
            detections[i] = [
                class_ids[i],
                float(scores[i]),
                boxes[i][0],
                boxes[i][1],
                boxes[i][2],
                boxes[i][3],
            ]
        return detections
    def detect(self, tensor_input, threshold=0.8):
        detections = []
        raw_detections = self._detect_raw(tensor_input)
        for d in raw_detections:
            if d[1] < threshold:
                break
            detections.append(
                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
            )
        return detections
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@ -19,9 +19,15 @@ from frigate.const import (
    AUDIO_SAMPLE_RATE,
    CACHE_DIR,
 )
-from frigate.detectors.plugins.audio_tfl import AudioTfl
+from frigate.detectors.detection_api import DetectionApi
 from frigate.object_detection import load_labels
 from frigate.util import listen
 try:
    from tflite_runtime.interpreter import Interpreter
 except ModuleNotFoundError:
    from tensorflow.lite.python.interpreter import Interpreter
 logger = logging.getLogger(__name__)
 FFMPEG_COMMAND = (
@ -47,6 +53,61 @@ def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None:
            AudioEventMaintainer(camera, stop_event)
 class AudioTfl(DetectionApi):
    def __init__(self, labels):
        self.labels = load_labels("/audio-labelmap.txt")
        self.interpreter = Interpreter(
            model_path="/cpu_audio_model.tflite",
            num_threads=2,
        )
        self.interpreter.allocate_tensors()
        self.tensor_input_details = self.interpreter.get_input_details()
        self.tensor_output_details = self.interpreter.get_output_details()
    def _detect_raw(self, tensor_input):
        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
        self.interpreter.invoke()
        detections = np.zeros((20, 6), np.float32)
        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
        non_zero_indices = res > 0
        class_ids = np.argpartition(-res, 20)[:20]
        class_ids = class_ids[np.argsort(-res[class_ids])]
        class_ids = class_ids[non_zero_indices[class_ids]]
        scores = res[class_ids]
        boxes = np.full((scores.shape[0], 4), -1, np.float32)
        count = len(scores)
        for i in range(count):
            if scores[i] < 0.4 or i == 20:
                break
            detections[i] = [
                class_ids[i],
                float(scores[i]),
                boxes[i][0],
                boxes[i][1],
                boxes[i][2],
                boxes[i][3],
            ]
        return detections
    def detect(self, tensor_input, threshold=0.8):
        detections = []
        raw_detections = self._detect_raw(tensor_input)
        for d in raw_detections:
            if d[1] < threshold:
                break
            detections.append(
                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
            )
        return detections
 class AudioEventMaintainer(threading.Thread):
    def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None:
        threading.Thread.__init__(self)