From e43a7e65f9ce3ffc6683639a4bdb8953025f353b Mon Sep 17 00:00:00 2001 From: Nick Mowen Date: Sat, 17 Jun 2023 16:20:42 -0600 Subject: [PATCH] Start adding config for audio --- audio-labelmap.txt | 194 ++++++++++++++++++++-------------------- frigate/config.py | 28 ++++++ frigate/events/audio.py | 51 +++++++++++ 3 files changed, 176 insertions(+), 97 deletions(-) create mode 100644 frigate/events/audio.py diff --git a/audio-labelmap.txt b/audio-labelmap.txt index c3e864e7d..7a5e132c5 100644 --- a/audio-labelmap.txt +++ b/audio-labelmap.txt @@ -1,101 +1,101 @@ -Speech -Child speech, kid speaking -Conversation -Narration, monologue -Babbling -Speech synthesizer -Shout -Bellow -Whoop -Yell -Children shouting -Screaming +speech +speech +speech +speech +babbling +speech +yell +bellow +whoop +yell +yell +yell Whispering -Laughter -Baby laughter -Giggle -Snicker -Belly laugh -Chuckle, chortle -Crying, sobbing -Baby cry, infant cry -Whimper -Wail, moan -Sigh -Singing -Choir -Yodeling -Chant -Mantra -Child singing -Synthetic singing -Rapping -Humming -Groan -Grunt -Whistling -Breathing -Wheeze -Snoring -Gasp -Pant -Snort -Cough -Throat clearing -Sneeze -Sniff -Run -Shuffle -Walk, footsteps -Chewing, mastication -Biting -Gargling -Stomach rumble -Burping, eructation -Hiccup -Fart -Hands -Finger snapping -Clapping -Heart sounds, heartbeat -Heart murmur -Cheering -Applause -Chatter -Crowd -Hubbub, speech noise, speech babble -Children playing -Animal -Domestic animals, pets -Dog -Bark -Yip -Howl -Bow-wow -Growling -Whimper (dog) -Cat -Purr -Meow -Hiss -Caterwaul -Livestock, farm animals, working animals -Horse -Clip-clop -Neigh, whinny -Cattle, bovinae -Moo -Cowbell -Pig -Oink -Goat -Bleat -Sheep -Fowl -Chicken, rooster -Cluck -Crowing, cock-a-doodle-doo -Turkey +laughter +laughter +laughter +snicker +laughter +laughter +crying +crying +crying +yell +sigh +singing +choir +sodeling +chant +mantra +child_singing +synthetic_singing +rapping +humming +groan +grunt +whistling +breathing +wheeze +snoring +gasp +pant +snort +cough +throat clearing +sneeze +sniff +run +shuffle +footsteps +chewing +biting +gargling +stomach_rumble +burping +hiccup +fart +hands +finger snapping +clapping +heartbeat +heart_murmur +cheering +applause +chatter +crowd +speech +children_playing +animal +pets +dog +bark +yip +howl +bow-wow +growling +whimper_dog +cat +purr +meow +hiss +caterwaul +livestock +horse +clip-clop +neigh +cattle +moo +cowbell +pig +oink +goat +bleat +sheep +fowl +chicken +cluck +cock-a-doodle-doo +turkey Gobble Duck Quack diff --git a/frigate/config.py b/frigate/config.py index 9b434ca1e..b2b837c5c 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -40,6 +40,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S" FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")} DEFAULT_TRACKED_OBJECTS = ["person"] +DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"] DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}} @@ -387,6 +388,16 @@ class ObjectConfig(FrigateBaseModel): mask: Union[str, List[str]] = Field(default="", title="Object mask.") +class AudioConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable audio events.") + max_not_heard: int = Field( + default=30, title="Seconds of not hearing the type of audio to end the event." + ) + listen: List[str] = Field( + default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for." + ) + + class BirdseyeModeEnum(str, Enum): objects = "objects" motion = "motion" @@ -466,6 +477,7 @@ class FfmpegConfig(FrigateBaseModel): class CameraRoleEnum(str, Enum): + audio = "audio" record = "record" rtmp = "rtmp" detect = "detect" @@ -627,6 +639,9 @@ class CameraConfig(FrigateBaseModel): objects: ObjectConfig = Field( default_factory=ObjectConfig, title="Object configuration." ) + audio: AudioConfig = Field( + default_factory=AudioConfig, title="Audio events configuration." + ) motion: Optional[MotionConfig] = Field(title="Motion detection configuration.") detect: DetectConfig = Field( default_factory=DetectConfig, title="Object detection configuration." @@ -657,12 +672,16 @@ class CameraConfig(FrigateBaseModel): # add roles to the input if there is only one if len(config["ffmpeg"]["inputs"]) == 1: has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", []) + has_audio = "audio" in config["audio"]["inputs"][0].get("roles", []) config["ffmpeg"]["inputs"][0]["roles"] = [ "record", "detect", ] + if has_audio: + config["ffmpeg"]["inputs"][0].append("audio") + if has_rtmp: config["ffmpeg"]["inputs"][0]["roles"].append("rtmp") @@ -795,6 +814,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None: f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input." ) + if camera_config.audio.enabled and "audio" not in assigned_roles: + raise ValueError( + f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input." + ) + def verify_valid_live_stream_name( frigate_config: FrigateConfig, camera_config: CameraConfig @@ -907,6 +931,9 @@ class FrigateConfig(FrigateBaseModel): objects: ObjectConfig = Field( default_factory=ObjectConfig, title="Global object configuration." ) + audio: AudioConfig = Field( + default_factory=AudioConfig, title="Global Audio events configuration." + ) motion: Optional[MotionConfig] = Field( title="Global motion detection configuration." ) @@ -931,6 +958,7 @@ class FrigateConfig(FrigateBaseModel): # Global config to propagate down to camera level global_config = config.dict( include={ + "audio": ..., "birdseye": ..., "record": ..., "snapshots": ..., diff --git a/frigate/events/audio.py b/frigate/events/audio.py new file mode 100644 index 000000000..13975edac --- /dev/null +++ b/frigate/events/audio.py @@ -0,0 +1,51 @@ +"""Handle creating audio events.""" + +import logging +import multiprocessing as mp +import signal +import threading +from types import FrameType +from typing import Optional + +from setproctitle import setproctitle + +from frigate.config import AudioConfig, CameraConfig, FrigateConfig +from frigate.const import CACHE_DIR +from frigate.util import listen + +logger = logging.getLogger(__name__) + +FFMPEG_COMMAND = "ffmpeg -vn -i {} -f s16le -ar 16000 -ac 1 -y {}" + + +def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None: + stop_event = mp.Event() + + def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: + stop_event.set() + + signal.signal(signal.SIGTERM, receiveSignal) + signal.signal(signal.SIGINT, receiveSignal) + + threading.current_thread().name = "process:recording_manager" + setproctitle("frigate.recording_manager") + listen() + + for camera in config.cameras.values(): + if camera.enabled and camera.audio.enabled: + AudioEventMaintainer(camera, stop_event) + + +class AudioEventMaintainer(threading.Thread): + def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None: + threading.Thread.__init__(self) + self.name = f"{camera.name}_audio_event_processor" + self.config = camera + self.stop_event = stop_event + + def run(self) -> None: + self.pipe = f"{CACHE_DIR}/{self.config.name}-audio" + self.ffmpeg_command = FFMPEG_COMMAND.format( + [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0], + self.pipe, + )