Subclass Process for audio_process

This commit is contained in:
George Tsiamasiotis 2024-09-24 10:29:05 +03:00
parent a7f1f8d327
commit 5c421375c0
2 changed files with 117 additions and 112 deletions

View File

@ -37,7 +37,7 @@ from frigate.const import (
RECORD_DIR, RECORD_DIR,
) )
from frigate.embeddings import EmbeddingsContext, manage_embeddings from frigate.embeddings import EmbeddingsContext, manage_embeddings
from frigate.events.audio import listen_to_audio from frigate.events.audio import AudioProcessor
from frigate.events.cleanup import EventCleanup from frigate.events.cleanup import EventCleanup
from frigate.events.external import ExternalEventProcessor from frigate.events.external import ExternalEventProcessor
from frigate.events.maintainer import EventProcessor from frigate.events.maintainer import EventProcessor
@ -489,20 +489,9 @@ class FrigateApp:
logger.info(f"Capture process started for {name}: {capture_process.pid}") logger.info(f"Capture process started for {name}: {capture_process.pid}")
def start_audio_processors(self) -> None: def start_audio_processors(self) -> None:
self.audio_process = None self.audio_process = AudioProcessor(self.config, self.camera_metrics)
if len([c for c in self.config.cameras.values() if c.audio.enabled]) > 0:
self.audio_process = mp.Process(
target=listen_to_audio,
name="audio_capture",
args=(
self.config,
self.camera_metrics,
),
)
self.audio_process.daemon = True
self.audio_process.start() self.audio_process.start()
self.processes["audio_detector"] = self.audio_process.pid or 0 self.processes["audio_detector"] = self.audio_process.pid or 0
logger.info(f"Audio process started: {self.audio_process.pid}")
def start_timeline_processor(self) -> None: def start_timeline_processor(self) -> None:
self.timeline_processor = TimelineProcessor( self.timeline_processor = TimelineProcessor(
@ -686,7 +675,6 @@ class FrigateApp:
).execute() ).execute()
# stop the audio process # stop the audio process
if self.audio_process is not None:
self.audio_process.terminate() self.audio_process.terminate()
self.audio_process.join() self.audio_process.join()

View File

@ -4,14 +4,13 @@ import datetime
import logging import logging
import multiprocessing as mp import multiprocessing as mp
import signal import signal
import sys
import threading import threading
import time import time
from types import FrameType from typing import Tuple
from typing import Optional, Tuple
import numpy as np import numpy as np
import requests import requests
from setproctitle import setproctitle
from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.config_updater import ConfigSubscriber
from frigate.comms.detections_updater import DetectionPublisher, DetectionTypeEnum from frigate.comms.detections_updater import DetectionPublisher, DetectionTypeEnum
@ -30,7 +29,6 @@ from frigate.log import LogPipe
from frigate.object_detection import load_labels from frigate.object_detection import load_labels
from frigate.types import CameraMetricsTypes from frigate.types import CameraMetricsTypes
from frigate.util.builtin import get_ffmpeg_arg_list from frigate.util.builtin import get_ffmpeg_arg_list
from frigate.util.services import listen
from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg
try: try:
@ -38,8 +36,6 @@ try:
except ModuleNotFoundError: except ModuleNotFoundError:
from tensorflow.lite.python.interpreter import Interpreter from tensorflow.lite.python.interpreter import Interpreter
logger = logging.getLogger(__name__)
def get_ffmpeg_command(ffmpeg: FfmpegConfig) -> list[str]: def get_ffmpeg_command(ffmpeg: FfmpegConfig) -> list[str]:
ffmpeg_input: CameraInput = [i for i in ffmpeg.inputs if "audio" in i.roles][0] ffmpeg_input: CameraInput = [i for i in ffmpeg.inputs if "audio" in i.roles][0]
@ -69,97 +65,58 @@ def get_ffmpeg_command(ffmpeg: FfmpegConfig) -> list[str]:
) )
def listen_to_audio( class AudioProcessor(mp.Process):
def __init__(
self,
config: FrigateConfig, config: FrigateConfig,
camera_metrics: dict[str, CameraMetricsTypes], camera_metrics: dict[str, CameraMetricsTypes],
) -> None: ):
stop_event = mp.Event() super().__init__(name="frigate.audio_manager", daemon=True)
audio_threads: list[threading.Thread] = []
def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: self.logger = logging.getLogger(self.name)
stop_event.set() self.camera_metrics = camera_metrics
self.audio_cameras = [
signal.signal(signal.SIGTERM, receiveSignal) c
signal.signal(signal.SIGINT, receiveSignal) for c in config.cameras.values()
if c.enabled and c.audio.enabled_in_config
threading.current_thread().name = "process:audio_manager"
setproctitle("frigate.audio_manager")
listen()
for camera in config.cameras.values():
if camera.enabled and camera.audio.enabled_in_config:
audio = AudioEventMaintainer(
camera,
camera_metrics,
stop_event,
)
audio_threads.append(audio)
audio.start()
for thread in audio_threads:
thread.join()
logger.info("Exiting audio detector...")
class AudioTfl:
def __init__(self, stop_event: mp.Event, num_threads=2):
self.stop_event = stop_event
self.num_threads = num_threads
self.labels = load_labels("/audio-labelmap.txt", prefill=521)
self.interpreter = Interpreter(
model_path="/cpu_audio_model.tflite",
num_threads=self.num_threads,
)
self.interpreter.allocate_tensors()
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
def _detect_raw(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
self.interpreter.invoke()
detections = np.zeros((20, 6), np.float32)
res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
non_zero_indices = res > 0
class_ids = np.argpartition(-res, 20)[:20]
class_ids = class_ids[np.argsort(-res[class_ids])]
class_ids = class_ids[non_zero_indices[class_ids]]
scores = res[class_ids]
boxes = np.full((scores.shape[0], 4), -1, np.float32)
count = len(scores)
for i in range(count):
if scores[i] < AUDIO_MIN_CONFIDENCE or i == 20:
break
detections[i] = [
class_ids[i],
float(scores[i]),
boxes[i][0],
boxes[i][1],
boxes[i][2],
boxes[i][3],
] ]
return detections def run(self) -> None:
stop_event = threading.Event()
audio_threads: list[AudioEventMaintainer] = []
def detect(self, tensor_input, threshold=AUDIO_MIN_CONFIDENCE): threading.current_thread().name = "process:audio_manager"
detections = [] signal.signal(signal.SIGTERM, lambda sig, frame: sys.exit())
if self.stop_event.is_set(): if len(self.audio_cameras) == 0:
return detections return
raw_detections = self._detect_raw(tensor_input) try:
for camera in self.audio_cameras:
for d in raw_detections: audio_thread = AudioEventMaintainer(
if d[1] < threshold: camera,
break self.camera_metrics,
detections.append( stop_event,
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
) )
return detections audio_threads.append(audio_thread)
audio_thread.start()
self.logger.info(f"Audio process started (pid: {self.pid})")
while True:
signal.pause()
finally:
stop_event.set()
for thread in audio_threads:
thread.join(1)
if thread.is_alive():
self.logger.info(f"Waiting for thread {thread.name:s} to exit")
thread.join(10)
for thread in audio_threads:
if thread.is_alive():
self.logger.warning(f"Thread {thread.name} is still alive")
self.logger.info("Exiting audio process")
class AudioEventMaintainer(threading.Thread): class AudioEventMaintainer(threading.Thread):
@ -167,10 +124,10 @@ class AudioEventMaintainer(threading.Thread):
self, self,
camera: CameraConfig, camera: CameraConfig,
camera_metrics: dict[str, CameraMetricsTypes], camera_metrics: dict[str, CameraMetricsTypes],
stop_event: mp.Event, stop_event: threading.Event,
) -> None: ) -> None:
threading.Thread.__init__(self) super().__init__(name=f"{camera.name}_audio_event_processor")
self.name = f"{camera.name}_audio_event_processor"
self.config = camera self.config = camera
self.camera_metrics = camera_metrics self.camera_metrics = camera_metrics
self.detections: dict[dict[str, any]] = {} self.detections: dict[dict[str, any]] = {}
@ -206,7 +163,7 @@ class AudioEventMaintainer(threading.Thread):
audio_detections = [] audio_detections = []
for label, score, _ in model_detections: for label, score, _ in model_detections:
logger.debug( self.logger.debug(
f"{self.config.name} heard {label} with a score of {score}" f"{self.config.name} heard {label} with a score of {score}"
) )
@ -291,7 +248,7 @@ class AudioEventMaintainer(threading.Thread):
if resp.status_code == 200: if resp.status_code == 200:
self.detections[detection["label"]] = None self.detections[detection["label"]] = None
else: else:
self.logger.warn( self.logger.warning(
f"Failed to end audio event {detection['id']} with status code {resp.status_code}" f"Failed to end audio event {detection['id']} with status code {resp.status_code}"
) )
@ -350,3 +307,63 @@ class AudioEventMaintainer(threading.Thread):
self.requestor.stop() self.requestor.stop()
self.config_subscriber.stop() self.config_subscriber.stop()
self.detection_publisher.stop() self.detection_publisher.stop()
class AudioTfl:
def __init__(self, stop_event: threading.Event, num_threads=2):
self.stop_event = stop_event
self.num_threads = num_threads
self.labels = load_labels("/audio-labelmap.txt", prefill=521)
self.interpreter = Interpreter(
model_path="/cpu_audio_model.tflite",
num_threads=self.num_threads,
)
self.interpreter.allocate_tensors()
self.tensor_input_details = self.interpreter.get_input_details()
self.tensor_output_details = self.interpreter.get_output_details()
def _detect_raw(self, tensor_input):
self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
self.interpreter.invoke()
detections = np.zeros((20, 6), np.float32)
res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
non_zero_indices = res > 0
class_ids = np.argpartition(-res, 20)[:20]
class_ids = class_ids[np.argsort(-res[class_ids])]
class_ids = class_ids[non_zero_indices[class_ids]]
scores = res[class_ids]
boxes = np.full((scores.shape[0], 4), -1, np.float32)
count = len(scores)
for i in range(count):
if scores[i] < AUDIO_MIN_CONFIDENCE or i == 20:
break
detections[i] = [
class_ids[i],
float(scores[i]),
boxes[i][0],
boxes[i][1],
boxes[i][2],
boxes[i][3],
]
return detections
def detect(self, tensor_input, threshold=AUDIO_MIN_CONFIDENCE):
detections = []
if self.stop_event.is_set():
return detections
raw_detections = self._detect_raw(tensor_input)
for d in raw_detections:
if d[1] < threshold:
break
detections.append(
(self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
)
return detections