diff --git a/frigate/app.py b/frigate/app.py
index 9d85f461e..e61510f7b 100644
--- a/frigate/app.py
+++ b/frigate/app.py
@@ -29,6 +29,7 @@ from frigate.const import (
     MODEL_CACHE_DIR,
     RECORD_DIR,
 )
+from frigate.events.audio import listen_to_audio
 from frigate.events.cleanup import EventCleanup
 from frigate.events.external import ExternalEventProcessor
 from frigate.events.maintainer import EventProcessor
@@ -390,6 +391,14 @@ class FrigateApp:
             capture_process.start()
             logger.info(f"Capture process started for {name}: {capture_process.pid}")
 
+    def start_audio_processors(self) -> None:
+        audio_process = mp.Process(
+            target=listen_to_audio,
+            name=f"audio_capture",
+            args=(self.config, self.event_queue)
+        )
+        logger.info(f"Audio process started: {audio_process.pid}")
+
     def start_timeline_processor(self) -> None:
         self.timeline_processor = TimelineProcessor(
             self.config, self.timeline_queue, self.stop_event
@@ -486,6 +495,7 @@ class FrigateApp:
         self.start_detected_frames_processor()
         self.start_camera_processors()
         self.start_camera_capture_processes()
+        self.start_audio_processors()
         self.start_storage_maintainer()
         self.init_stats()
         self.init_external_event_processor()
diff --git a/frigate/detectors/plugins/audio_tfl.py b/frigate/detectors/plugins/audio_tfl.py
deleted file mode 100644
index ac5c50381..000000000
--- a/frigate/detectors/plugins/audio_tfl.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import logging
-
-import numpy as np
-from pydantic import Field
-from typing_extensions import Literal
-
-from frigate.detectors.detection_api import DetectionApi
-from frigate.object_detection import load_labels
-
-try:
-    from tflite_runtime.interpreter import Interpreter
-except ModuleNotFoundError:
-    from tensorflow.lite.python.interpreter import Interpreter
-
-
-logger = logging.getLogger(__name__)
-
-DETECTOR_KEY = "audio"
-
-
-class AudioTfl(DetectionApi):
-    type_key = DETECTOR_KEY
-
-    def __init__(self, labels):
-        self.labels = load_labels("/audio-labelmap.txt")
-        self.interpreter = Interpreter(
-            model_path="/cpu_audio_model.tflite",
-            num_threads=2,
-        )
-
-        self.interpreter.allocate_tensors()
-
-        self.tensor_input_details = self.interpreter.get_input_details()
-        self.tensor_output_details = self.interpreter.get_output_details()
-
-    def _detect_raw(self, tensor_input):
-        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
-        self.interpreter.invoke()
-        detections = np.zeros((20, 6), np.float32)
-
-        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
-        non_zero_indices = res > 0
-        class_ids = np.argpartition(-res, 20)[:20]
-        class_ids = class_ids[np.argsort(-res[class_ids])]
-        class_ids = class_ids[non_zero_indices[class_ids]]
-        scores = res[class_ids]
-        boxes = np.full((scores.shape[0], 4), -1, np.float32)
-        count = len(scores)
-
-        for i in range(count):
-            if scores[i] < 0.4 or i == 20:
-                break
-            detections[i] = [
-                class_ids[i],
-                float(scores[i]),
-                boxes[i][0],
-                boxes[i][1],
-                boxes[i][2],
-                boxes[i][3],
-            ]
-
-        return detections
-
-    def detect(self, tensor_input, threshold=0.8):
-        detections = []
-
-        raw_detections = self._detect_raw(tensor_input)
-
-        for d in raw_detections:
-            if d[1] < threshold:
-                break
-            detections.append(
-                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
-            )
-        return detections
diff --git a/frigate/events/audio.py b/frigate/events/audio.py
index 7b792bca5..ddaa2c69d 100644
--- a/frigate/events/audio.py
+++ b/frigate/events/audio.py
@@ -19,9 +19,15 @@ from frigate.const import (
     AUDIO_SAMPLE_RATE,
     CACHE_DIR,
 )
-from frigate.detectors.plugins.audio_tfl import AudioTfl
+from frigate.detectors.detection_api import DetectionApi
+from frigate.object_detection import load_labels
 from frigate.util import listen
 
+try:
+    from tflite_runtime.interpreter import Interpreter
+except ModuleNotFoundError:
+    from tensorflow.lite.python.interpreter import Interpreter
+
 logger = logging.getLogger(__name__)
 
 FFMPEG_COMMAND = (
@@ -47,6 +53,61 @@ def listen_to_audio(config: FrigateConfig, event_queue: mp.Queue) -> None:
             AudioEventMaintainer(camera, stop_event)
 
 
+class AudioTfl(DetectionApi):
+    def __init__(self, labels):
+        self.labels = load_labels("/audio-labelmap.txt")
+        self.interpreter = Interpreter(
+            model_path="/cpu_audio_model.tflite",
+            num_threads=2,
+        )
+
+        self.interpreter.allocate_tensors()
+
+        self.tensor_input_details = self.interpreter.get_input_details()
+        self.tensor_output_details = self.interpreter.get_output_details()
+
+    def _detect_raw(self, tensor_input):
+        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
+        self.interpreter.invoke()
+        detections = np.zeros((20, 6), np.float32)
+
+        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
+        non_zero_indices = res > 0
+        class_ids = np.argpartition(-res, 20)[:20]
+        class_ids = class_ids[np.argsort(-res[class_ids])]
+        class_ids = class_ids[non_zero_indices[class_ids]]
+        scores = res[class_ids]
+        boxes = np.full((scores.shape[0], 4), -1, np.float32)
+        count = len(scores)
+
+        for i in range(count):
+            if scores[i] < 0.4 or i == 20:
+                break
+            detections[i] = [
+                class_ids[i],
+                float(scores[i]),
+                boxes[i][0],
+                boxes[i][1],
+                boxes[i][2],
+                boxes[i][3],
+            ]
+
+        return detections
+
+    def detect(self, tensor_input, threshold=0.8):
+        detections = []
+
+        raw_detections = self._detect_raw(tensor_input)
+
+        for d in raw_detections:
+            if d[1] < threshold:
+                break
+            detections.append(
+                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
+            )
+        return detections
+
+
 class AudioEventMaintainer(threading.Thread):
     def __init__(self, camera: CameraConfig, stop_event: mp.Event) -> None:
         threading.Thread.__init__(self)