2023-06-18 01:20:42 +03:00
|
|
|
"""Handle creating audio events."""
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
import multiprocessing as mp
|
2023-06-18 22:30:13 +03:00
|
|
|
import numpy as np
|
2023-06-18 22:59:35 +03:00
|
|
|
import os
|
2023-06-18 01:20:42 +03:00
|
|
|
import signal
|
2023-06-18 22:30:13 +03:00
|
|
|
import subprocess as sp
|
2023-06-18 01:20:42 +03:00
|
|
|
import threading
|
|
|
|
|
from types import FrameType
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
from setproctitle import setproctitle
|
|
|
|
|
|
2023-06-18 22:30:13 +03:00
|
|
|
from frigate.config import CameraConfig, FrigateConfig
|
2023-06-18 21:51:49 +03:00
|
|
|
from frigate.const import (
|
|
|
|
|
AUDIO_DETECTOR,
|
|
|
|
|
AUDIO_DURATION,
|
|
|
|
|
AUDIO_FORMAT,
|
|
|
|
|
AUDIO_SAMPLE_RATE,
|
|
|
|
|
CACHE_DIR,
|
|
|
|
|
)
|
2023-06-18 22:59:35 +03:00
|
|
|
from frigate.ffmpeg_presets import parse_preset_input
|
2023-06-18 22:37:52 +03:00
|
|
|
from frigate.object_detection import load_labels
|
2023-06-18 22:59:35 +03:00
|
|
|
from frigate.util import get_ffmpeg_arg_list, listen
|
2023-06-18 01:20:42 +03:00
|
|
|
|
2023-06-18 22:37:52 +03:00
|
|
|
try:
|
|
|
|
|
from tflite_runtime.interpreter import Interpreter
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
|
from tensorflow.lite.python.interpreter import Interpreter
|
|
|
|
|
|
2023-06-18 01:20:42 +03:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2023-06-18 21:51:49 +03:00
|
|
|
FFMPEG_COMMAND = (
|
2023-06-19 01:34:38 +03:00
|
|
|
f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}"
|
2023-06-18 21:51:49 +03:00
|
|
|
)
|
2023-06-18 01:20:42 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None:
|
|
|
|
|
stop_event = mp.Event()
|
|
|
|
|
|
|
|
|
|
def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
|
|
|
|
|
stop_event.set()
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGTERM, receiveSignal)
|
|
|
|
|
signal.signal(signal.SIGINT, receiveSignal)
|
|
|
|
|
|
|
|
|
|
threading.current_thread().name = "process:recording_manager"
|
|
|
|
|
setproctitle("frigate.recording_manager")
|
|
|
|
|
listen()
|
|
|
|
|
|
|
|
|
|
for camera in config.cameras.values():
|
|
|
|
|
if camera.enabled and camera.audio.enabled:
|
2023-06-18 22:59:35 +03:00
|
|
|
AudioEventMaintainer(camera, stop_event).start()
|
2023-06-18 01:20:42 +03:00
|
|
|
|
|
|
|
|
|
2023-06-18 22:59:35 +03:00
|
|
|
class AudioTfl:
|
2023-06-18 22:42:15 +03:00
|
|
|
def __init__(self):
|
2023-06-18 22:37:52 +03:00
|
|
|
self.labels = load_labels("/audio-labelmap.txt")
|
|
|
|
|
self.interpreter = Interpreter(
|
|
|
|
|
model_path="/cpu_audio_model.tflite",
|
|
|
|
|
num_threads=2,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.interpreter.allocate_tensors()
|
|
|
|
|
|
|
|
|
|
self.tensor_input_details = self.interpreter.get_input_details()
|
|
|
|
|
self.tensor_output_details = self.interpreter.get_output_details()
|
|
|
|
|
|
|
|
|
|
def _detect_raw(self, tensor_input):
|
|
|
|
|
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
|
|
|
|
|
self.interpreter.invoke()
|
|
|
|
|
detections = np.zeros((20, 6), np.float32)
|
|
|
|
|
|
|
|
|
|
res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
|
|
|
|
|
non_zero_indices = res > 0
|
|
|
|
|
class_ids = np.argpartition(-res, 20)[:20]
|
|
|
|
|
class_ids = class_ids[np.argsort(-res[class_ids])]
|
|
|
|
|
class_ids = class_ids[non_zero_indices[class_ids]]
|
|
|
|
|
scores = res[class_ids]
|
|
|
|
|
boxes = np.full((scores.shape[0], 4), -1, np.float32)
|
|
|
|
|
count = len(scores)
|
|
|
|
|
|
|
|
|
|
for i in range(count):
|
|
|
|
|
if scores[i] < 0.4 or i == 20:
|
|
|
|
|
break
|
|
|
|
|
detections[i] = [
|
|
|
|
|
class_ids[i],
|
|
|
|
|
float(scores[i]),
|
|
|
|
|
boxes[i][0],
|
|
|
|
|
boxes[i][1],
|
|
|
|
|
boxes[i][2],
|
|
|
|
|
boxes[i][3],
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
return detections
|
|
|
|
|
|
|
|
|
|
def detect(self, tensor_input, threshold=0.8):
|
|
|
|
|
detections = []
|
|
|
|
|
|
|
|
|
|
raw_detections = self._detect_raw(tensor_input)
|
|
|
|
|
|
|
|
|
|
for d in raw_detections:
|
|
|
|
|
if d[1] < threshold:
|
|
|
|
|
break
|
|
|
|
|
detections.append(
|
|
|
|
|
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
|
|
|
|
|
)
|
|
|
|
|
return detections
|
|
|
|
|
|
|
|
|
|
|
2023-06-18 01:20:42 +03:00
|
|
|
class AudioEventMaintainer(threading.Thread):
|
|
|
|
|
def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None:
|
|
|
|
|
threading.Thread.__init__(self)
|
|
|
|
|
self.name = f"{camera.name}_audio_event_processor"
|
|
|
|
|
self.config = camera
|
|
|
|
|
self.stop_event = stop_event
|
2023-06-18 22:04:41 +03:00
|
|
|
self.detector = AudioTfl()
|
2023-06-18 21:51:49 +03:00
|
|
|
self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
|
2023-06-18 22:30:13 +03:00
|
|
|
self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2))
|
2023-06-18 01:20:42 +03:00
|
|
|
self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
|
2023-06-19 01:34:38 +03:00
|
|
|
self.ffmpeg_cmd = get_ffmpeg_arg_list(
|
|
|
|
|
FFMPEG_COMMAND.format(
|
|
|
|
|
" ".join(
|
|
|
|
|
self.config.ffmpeg.global_args
|
|
|
|
|
+ parse_preset_input("preset-rtsp-audio-only", 1)
|
|
|
|
|
),
|
|
|
|
|
[i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
|
|
|
|
|
self.pipe,
|
|
|
|
|
)
|
|
|
|
|
)
|
2023-06-18 22:30:13 +03:00
|
|
|
self.pipe_file = None
|
|
|
|
|
self.audio_listener = None
|
|
|
|
|
|
|
|
|
|
def detect_audio(self, audio) -> None:
|
|
|
|
|
waveform = (audio / 32768.0).astype(np.float32)
|
|
|
|
|
model_detections = self.detector.detect(waveform)
|
|
|
|
|
|
|
|
|
|
for label, score, _ in model_detections:
|
|
|
|
|
if label not in self.config.audio.listen:
|
|
|
|
|
continue
|
|
|
|
|
|
2023-06-19 01:34:38 +03:00
|
|
|
logger.error(f"Detected audio: {label} with score {score}")
|
|
|
|
|
# TODO handle valid detect
|
2023-06-18 22:30:13 +03:00
|
|
|
|
|
|
|
|
def init_ffmpeg(self) -> None:
|
2023-06-18 22:59:35 +03:00
|
|
|
try:
|
|
|
|
|
os.mkfifo(self.pipe)
|
|
|
|
|
except FileExistsError:
|
|
|
|
|
pass
|
|
|
|
|
|
2023-06-19 01:34:38 +03:00
|
|
|
logger.error(f"Made the pipe")
|
|
|
|
|
|
|
|
|
|
self.audio_listener = sp.Popen(
|
|
|
|
|
self.ffmpeg_cmd,
|
|
|
|
|
stdout=sp.DEVNULL,
|
|
|
|
|
stdin=sp.DEVNULL,
|
|
|
|
|
start_new_session=True,
|
|
|
|
|
)
|
|
|
|
|
logger.error(f"Started ffmpeg")
|
2023-06-18 22:30:13 +03:00
|
|
|
|
|
|
|
|
def read_audio(self) -> None:
|
2023-06-18 22:59:35 +03:00
|
|
|
if self.pipe_file is None:
|
|
|
|
|
self.pipe_file = open(self.pipe, "rb")
|
|
|
|
|
|
2023-06-18 22:30:13 +03:00
|
|
|
try:
|
2023-06-19 01:34:38 +03:00
|
|
|
audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16)
|
2023-06-18 22:30:13 +03:00
|
|
|
self.detect_audio(audio)
|
|
|
|
|
except BrokenPipeError as e:
|
|
|
|
|
logger.error(f"There was a broken pipe :: {e}")
|
|
|
|
|
# TODO fix broken pipe
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def run(self) -> None:
|
|
|
|
|
self.init_ffmpeg()
|
|
|
|
|
|
|
|
|
|
while not self.stop_event.is_set():
|
|
|
|
|
self.read_audio()
|