Start adding config for audio

2026-02-05 18:55:23 +03:00 · 2023-06-17 16:20:42 -06:00 · 2023-06-17 16:20:42 -06:00 · e43a7e65f9
commit e43a7e65f9
parent 8055fbc6e8
3 changed files with 176 additions and 97 deletions
--- a/audio-labelmap.txt
+++ b/audio-labelmap.txt
@ -1,101 +1,101 @@
-Speech
+speech
-Child speech, kid speaking
+speech
-Conversation
+speech
-Narration, monologue
+speech
-Babbling
+babbling
-Speech synthesizer
+speech
-Shout
+yell
-Bellow
+bellow
-Whoop
+whoop
-Yell
+yell
-Children shouting
+yell
-Screaming
+yell
 Whispering
-Laughter
+laughter
-Baby laughter
+laughter
-Giggle
+laughter
-Snicker
+snicker
-Belly laugh
+laughter
-Chuckle, chortle
+laughter
-Crying, sobbing
+crying
-Baby cry, infant cry
+crying
-Whimper
+crying
-Wail, moan
+yell
-Sigh
+sigh
-Singing
+singing
-Choir
+choir
-Yodeling
+sodeling
-Chant
+chant
-Mantra
+mantra
-Child singing
+child_singing
-Synthetic singing
+synthetic_singing
-Rapping
+rapping
-Humming
+humming
-Groan
+groan
-Grunt
+grunt
-Whistling
+whistling
-Breathing
+breathing
-Wheeze
+wheeze
-Snoring
+snoring
-Gasp
+gasp
-Pant
+pant
-Snort
+snort
-Cough
+cough
-Throat clearing
+throat clearing
-Sneeze
+sneeze
-Sniff
+sniff
-Run
+run
-Shuffle
+shuffle
-Walk, footsteps
+footsteps
-Chewing, mastication
+chewing
-Biting
+biting
-Gargling
+gargling
-Stomach rumble
+stomach_rumble
-Burping, eructation
+burping
-Hiccup
+hiccup
-Fart
+fart
-Hands
+hands
-Finger snapping
+finger snapping
-Clapping
+clapping
-Heart sounds, heartbeat
+heartbeat
-Heart murmur
+heart_murmur
-Cheering
+cheering
-Applause
+applause
-Chatter
+chatter
-Crowd
+crowd
-Hubbub, speech noise, speech babble
+speech
-Children playing
+children_playing
-Animal
+animal
-Domestic animals, pets
+pets
-Dog
+dog
-Bark
+bark
-Yip
+yip
-Howl
+howl
-Bow-wow
+bow-wow
-Growling
+growling
-Whimper (dog)
+whimper_dog
-Cat
+cat
-Purr
+purr
-Meow
+meow
-Hiss
+hiss
-Caterwaul
+caterwaul
-Livestock, farm animals, working animals
+livestock
-Horse
+horse
-Clip-clop
+clip-clop
-Neigh, whinny
+neigh
-Cattle, bovinae
+cattle
-Moo
+moo
-Cowbell
+cowbell
-Pig
+pig
-Oink
+oink
-Goat
+goat
-Bleat
+bleat
-Sheep
+sheep
-Fowl
+fowl
-Chicken, rooster
+chicken
-Cluck
+cluck
-Crowing, cock-a-doodle-doo
+cock-a-doodle-doo
-Turkey
+turkey
 Gobble
 Duck
 Quack
--- a/frigate/config.py
+++ b/frigate/config.py
@ -40,6 +40,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S"
 FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}
 DEFAULT_TRACKED_OBJECTS = ["person"]
 DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"]
 DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}
@ -387,6 +388,16 @@ class ObjectConfig(FrigateBaseModel):
    mask: Union[str, List[str]] = Field(default="", title="Object mask.")
 class AudioConfig(FrigateBaseModel):
    enabled: bool = Field(default=False, title="Enable audio events.")
    max_not_heard: int = Field(
        default=30, title="Seconds of not hearing the type of audio to end the event."
    )
    listen: List[str] = Field(
        default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
    )
 class BirdseyeModeEnum(str, Enum):
    objects = "objects"
    motion = "motion"
@ -466,6 +477,7 @@ class FfmpegConfig(FrigateBaseModel):
 class CameraRoleEnum(str, Enum):
    audio = "audio"
    record = "record"
    rtmp = "rtmp"
    detect = "detect"
@ -627,6 +639,9 @@ class CameraConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Object configuration."
    )
    audio: AudioConfig = Field(
        default_factory=AudioConfig, title="Audio events configuration."
    )
    motion: Optional[MotionConfig] = Field(title="Motion detection configuration.")
    detect: DetectConfig = Field(
        default_factory=DetectConfig, title="Object detection configuration."
@ -657,12 +672,16 @@ class CameraConfig(FrigateBaseModel):
        # add roles to the input if there is only one
        if len(config["ffmpeg"]["inputs"]) == 1:
            has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
            has_audio = "audio" in config["audio"]["inputs"][0].get("roles", [])
            config["ffmpeg"]["inputs"][0]["roles"] = [
                "record",
                "detect",
            ]
            if has_audio:
                config["ffmpeg"]["inputs"][0].append("audio")
            if has_rtmp:
                config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")
@ -795,6 +814,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None:
            f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input."
        )
    if camera_config.audio.enabled and "audio" not in assigned_roles:
        raise ValueError(
            f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input."
        )
 def verify_valid_live_stream_name(
    frigate_config: FrigateConfig, camera_config: CameraConfig
@ -907,6 +931,9 @@ class FrigateConfig(FrigateBaseModel):
    objects: ObjectConfig = Field(
        default_factory=ObjectConfig, title="Global object configuration."
    )
    audio: AudioConfig = Field(
        default_factory=AudioConfig, title="Global Audio events configuration."
    )
    motion: Optional[MotionConfig] = Field(
        title="Global motion detection configuration."
    )
@ -931,6 +958,7 @@ class FrigateConfig(FrigateBaseModel):
        # Global config to propagate down to camera level
        global_config = config.dict(
            include={
                "audio": ...,
                "birdseye": ...,
                "record": ...,
                "snapshots": ...,
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@ -0,0 +1,51 @@
 """Handle creating audio events."""
 import logging
 import multiprocessing as mp
 import signal
 import threading
 from types import FrameType
 from typing import Optional
 from setproctitle import setproctitle
 from frigate.config import AudioConfig, CameraConfig, FrigateConfig
 from frigate.const import CACHE_DIR
 from frigate.util import listen
 logger = logging.getLogger(__name__)
 FFMPEG_COMMAND = "ffmpeg -vn -i {} -f s16le -ar 16000 -ac 1 -y {}"
 def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None:
    stop_event = mp.Event()
    def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
        stop_event.set()
    signal.signal(signal.SIGTERM, receiveSignal)
    signal.signal(signal.SIGINT, receiveSignal)
    threading.current_thread().name = "process:recording_manager"
    setproctitle("frigate.recording_manager")
    listen()
    for camera in config.cameras.values():
        if camera.enabled and camera.audio.enabled:
            AudioEventMaintainer(camera, stop_event)
 class AudioEventMaintainer(threading.Thread):
    def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None:
        threading.Thread.__init__(self)
        self.name = f"{camera.name}_audio_event_processor"
        self.config = camera
        self.stop_event = stop_event
    def run(self) -> None:
        self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
        self.ffmpeg_command = FFMPEG_COMMAND.format(
            [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
            self.pipe,
        )