Start adding config for audio

2026-02-05 10:45:21 +03:00 · 2023-06-17 16:20:42 -06:00 · 2023-06-17 16:20:42 -06:00 · e43a7e65f9
commit e43a7e65f9
parent 8055fbc6e8
3 changed files with 176 additions and 97 deletions
--- a/audio-labelmap.txt
+++ b/audio-labelmap.txt
@ -1,101 +1,101 @@
-Speech
-Child speech, kid speaking
-Conversation
-Narration, monologue
-Babbling
-Speech synthesizer
-Shout
-Bellow
-Whoop
-Yell
-Children shouting
-Screaming
+speech
+speech
+speech
+speech
+babbling
+speech
+yell
+bellow
+whoop
+yell
+yell
+yell
 Whispering
-Laughter
-Baby laughter
-Giggle
-Snicker
-Belly laugh
-Chuckle, chortle
-Crying, sobbing
-Baby cry, infant cry
-Whimper
-Wail, moan
-Sigh
-Singing
-Choir
-Yodeling
-Chant
-Mantra
-Child singing
-Synthetic singing
-Rapping
-Humming
-Groan
-Grunt
-Whistling
-Breathing
-Wheeze
-Snoring
-Gasp
-Pant
-Snort
-Cough
-Throat clearing
-Sneeze
-Sniff
-Run
-Shuffle
-Walk, footsteps
-Chewing, mastication
-Biting
-Gargling
-Stomach rumble
-Burping, eructation
-Hiccup
-Fart
-Hands
-Finger snapping
-Clapping
-Heart sounds, heartbeat
-Heart murmur
-Cheering
-Applause
-Chatter
-Crowd
-Hubbub, speech noise, speech babble
-Children playing
-Animal
-Domestic animals, pets
-Dog
-Bark
-Yip
-Howl
-Bow-wow
-Growling
-Whimper (dog)
-Cat
-Purr
-Meow
-Hiss
-Caterwaul
-Livestock, farm animals, working animals
-Horse
-Clip-clop
-Neigh, whinny
-Cattle, bovinae
-Moo
-Cowbell
-Pig
-Oink
-Goat
-Bleat
-Sheep
-Fowl
-Chicken, rooster
-Cluck
-Crowing, cock-a-doodle-doo
-Turkey
+laughter
+laughter
+laughter
+snicker
+laughter
+laughter
+crying
+crying
+crying
+yell
+sigh
+singing
+choir
+sodeling
+chant
+mantra
+child_singing
+synthetic_singing
+rapping
+humming
+groan
+grunt
+whistling
+breathing
+wheeze
+snoring
+gasp
+pant
+snort
+cough
+throat clearing
+sneeze
+sniff
+run
+shuffle
+footsteps
+chewing
+biting
+gargling
+stomach_rumble
+burping
+hiccup
+fart
+hands
+finger snapping
+clapping
+heartbeat
+heart_murmur
+cheering
+applause
+chatter
+crowd
+speech
+children_playing
+animal
+pets
+dog
+bark
+yip
+howl
+bow-wow
+growling
+whimper_dog
+cat
+purr
+meow
+hiss
+caterwaul
+livestock
+horse
+clip-clop
+neigh
+cattle
+moo
+cowbell
+pig
+oink
+goat
+bleat
+sheep
+fowl
+chicken
+cluck
+cock-a-doodle-doo
+turkey
 Gobble
 Duck
 Quack
--- a/frigate/config.py
+++ b/frigate/config.py
@ -40,6 +40,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S"
 FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}

 DEFAULT_TRACKED_OBJECTS = ["person"]
+DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"]
 DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}


@ -387,6 +388,16 @@ class ObjectConfig(FrigateBaseModel):
    mask: Union[str, List[str]] = Field(default="", title="Object mask.")


+class AudioConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable audio events.")
+    max_not_heard: int = Field(
+        default=30, title="Seconds of not hearing the type of audio to end the event."
+    )
+    listen: List[str] = Field(
+        default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
+    )
+
+
 class BirdseyeModeEnum(str, Enum):
    objects = "objects"
    motion = "motion"
@ -466,6 +477,7 @@ class FfmpegConfig(FrigateBaseModel):


 class CameraRoleEnum(str, Enum):
+    audio = "audio"
    record = "record"
    rtmp = "rtmp"
    detect = "detect"
@ -627,6 +639,9 @@ class CameraConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Object configuration."
    )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Audio events configuration."
+    )
    motion: Optional[MotionConfig] = Field(title="Motion detection configuration.")
    detect: DetectConfig = Field(
        default_factory=DetectConfig, title="Object detection configuration."
@ -657,12 +672,16 @@ class CameraConfig(FrigateBaseModel):
        # add roles to the input if there is only one
        if len(config["ffmpeg"]["inputs"]) == 1:
            has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
+            has_audio = "audio" in config["audio"]["inputs"][0].get("roles", [])

            config["ffmpeg"]["inputs"][0]["roles"] = [
                "record",
                "detect",
            ]

+            if has_audio:
+                config["ffmpeg"]["inputs"][0].append("audio")
+
            if has_rtmp:
                config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")

@ -795,6 +814,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None:
            f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input."
        )

+    if camera_config.audio.enabled and "audio" not in assigned_roles:
+        raise ValueError(
+            f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input."
+        )
+

 def verify_valid_live_stream_name(
    frigate_config: FrigateConfig, camera_config: CameraConfig
@ -907,6 +931,9 @@ class FrigateConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Global object configuration."
    )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Global Audio events configuration."
+    )
    motion: Optional[MotionConfig] = Field(
        title="Global motion detection configuration."
    )
@ -931,6 +958,7 @@ class FrigateConfig(FrigateBaseModel):
        # Global config to propagate down to camera level
        global_config = config.dict(
            include={
+                "audio": ...,
                "birdseye": ...,
                "record": ...,
                "snapshots": ...,
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@ -0,0 +1,51 @@
+"""Handle creating audio events."""
+
+import logging
+import multiprocessing as mp
+import signal
+import threading
+from types import FrameType
+from typing import Optional
+
+from setproctitle import setproctitle
+
+from frigate.config import AudioConfig, CameraConfig, FrigateConfig
+from frigate.const import CACHE_DIR
+from frigate.util import listen
+
+logger = logging.getLogger(__name__)
+
+FFMPEG_COMMAND = "ffmpeg -vn -i {} -f s16le -ar 16000 -ac 1 -y {}"
+
+
+def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None:
+    stop_event = mp.Event()
+
+    def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
+        stop_event.set()
+
+    signal.signal(signal.SIGTERM, receiveSignal)
+    signal.signal(signal.SIGINT, receiveSignal)
+
+    threading.current_thread().name = "process:recording_manager"
+    setproctitle("frigate.recording_manager")
+    listen()
+
+    for camera in config.cameras.values():
+        if camera.enabled and camera.audio.enabled:
+            AudioEventMaintainer(camera, stop_event)
+
+
+class AudioEventMaintainer(threading.Thread):
+    def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None:
+        threading.Thread.__init__(self)
+        self.name = f"{camera.name}_audio_event_processor"
+        self.config = camera
+        self.stop_event = stop_event
+
+    def run(self) -> None:
+        self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
+        self.ffmpeg_command = FFMPEG_COMMAND.format(
+            [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
+            self.pipe,
+        )