diff --git a/frigate/config.py b/frigate/config.py index b2b837c5c..c86be26ac 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -672,7 +672,7 @@ class CameraConfig(FrigateBaseModel): # add roles to the input if there is only one if len(config["ffmpeg"]["inputs"]) == 1: has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", []) - has_audio = "audio" in config["audio"]["inputs"][0].get("roles", []) + has_audio = "audio" in config["ffmpeg"]["inputs"][0].get("roles", []) config["ffmpeg"]["inputs"][0]["roles"] = [ "record", @@ -680,7 +680,7 @@ class CameraConfig(FrigateBaseModel): ] if has_audio: - config["ffmpeg"]["inputs"][0].append("audio") + config["ffmpeg"]["inputs"][0]["roles"].append("audio") if has_rtmp: config["ffmpeg"]["inputs"][0]["roles"].append("rtmp") diff --git a/frigate/detectors/plugins/audio_tfl.py b/frigate/detectors/plugins/audio_tfl.py index a3750ebfa..ac5c50381 100644 --- a/frigate/detectors/plugins/audio_tfl.py +++ b/frigate/detectors/plugins/audio_tfl.py @@ -5,7 +5,6 @@ from pydantic import Field from typing_extensions import Literal from frigate.detectors.detection_api import DetectionApi -from frigate.detectors.detector_config import BaseDetectorConfig from frigate.object_detection import load_labels try: diff --git a/frigate/events/audio.py b/frigate/events/audio.py index 96731f0da..7b792bca5 100644 --- a/frigate/events/audio.py +++ b/frigate/events/audio.py @@ -2,14 +2,16 @@ import logging import multiprocessing as mp +import numpy as np import signal +import subprocess as sp import threading from types import FrameType from typing import Optional from setproctitle import setproctitle -from frigate.config import AudioConfig, CameraConfig, FrigateConfig +from frigate.config import CameraConfig, FrigateConfig from frigate.const import ( AUDIO_DETECTOR, AUDIO_DURATION, @@ -53,16 +55,42 @@ class AudioEventMaintainer(threading.Thread): self.stop_event = stop_event self.detector = AudioTfl() self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),) - - def detect_audio(self) -> None: - pass - - def listen_to_audio(self) -> None: - pass - - def run(self) -> None: + self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2)) self.pipe = f"{CACHE_DIR}/{self.config.name}-audio" self.ffmpeg_command = FFMPEG_COMMAND.format( [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0], self.pipe, ) + self.pipe_file = None + self.audio_listener = None + + def detect_audio(self, audio) -> None: + waveform = (audio / 32768.0).astype(np.float32) + model_detections = self.detector.detect(waveform) + + for label, score, _ in model_detections: + if label not in self.config.audio.listen: + continue + + logger.error(f"Detected audio: {label} with score {score}") + # TODO handle valid detect + + def init_ffmpeg(self) -> None: + logger.error(f"Starting audio ffmpeg") + self.pipe_file = open(self.pipe, "rb") + self.audio_listener = sp.run(self.ffmpeg_command) + + def read_audio(self) -> None: + try: + audio = self.pipe_file.read(self.chunk_size) + self.detect_audio(audio) + except BrokenPipeError as e: + logger.error(f"There was a broken pipe :: {e}") + # TODO fix broken pipe + pass + + def run(self) -> None: + self.init_ffmpeg() + + while not self.stop_event.is_set(): + self.read_audio()