Initial audio classification model implementation

2026-02-04 02:05:21 +03:00 · 2023-01-07 01:02:35 -05:00 · 2023-01-07 01:02:35 -05:00 · 68ade5063d
commit 68ade5063d
parent ec7aaa18ab
10 changed files with 402 additions and 71 deletions
--- a/7
+++ b/7
@ -12,7 +12,7 @@ FROM debian:11-slim AS slim-base
 FROM slim-base AS wget
 ARG DEBIAN_FRONTEND
 RUN apt-get update \
-    && apt-get install -y wget xz-utils \
+    && apt-get install -y wget xz-utils unzip \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /rootfs
@ -93,7 +93,10 @@ COPY labelmap.txt .
 COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model
 RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \
    sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt
-
+# Get Audio Model and labels
 RUN wget -qO edgetpu_audio_model.tflite https://tfhub.dev/google/coral-model/yamnet/classification/coral/1?coral-format=tflite
 RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
 RUN unzip -q edgetpu_audio_model.tflite yamnet_label_list.txt && chmod +r yamnet_label_list.txt
 FROM wget AS s6-overlay
--- a/frigate/app.py
+++ b/frigate/app.py
@ -13,6 +13,7 @@ from peewee_migrate import Router
 from playhouse.sqlite_ext import SqliteExtDatabase
 from playhouse.sqliteq import SqliteQueueDatabase
 from frigate.audio import capture_audio, process_audio
 from frigate.comms.dispatcher import Communicator, Dispatcher
 from frigate.comms.mqtt import MqttClient
 from frigate.comms.ws import WebSocketClient
@ -42,6 +43,7 @@ class FrigateApp:
    def __init__(self) -> None:
        self.stop_event: MpEvent = mp.Event()
        self.detection_queue: Queue = mp.Queue()
        self.audio_detection_queue: Queue = mp.Queue()
        self.detectors: dict[str, ObjectDetectProcess] = {}
        self.detection_out_events: dict[str, MpEvent] = {}
        self.detection_shms: list[mp.shared_memory.SharedMemory] = []
@ -104,6 +106,7 @@ class FrigateApp:
                "read_start": mp.Value("d", 0.0),
                "ffmpeg_pid": mp.Value("i", 0),
                "frame_queue": mp.Queue(maxsize=2),
                "audio_queue": mp.Queue(maxsize=2),
                "capture_process": None,
                "process": None,
            }
@ -182,7 +185,7 @@ class FrigateApp:
        self.dispatcher = Dispatcher(self.config, self.camera_metrics, comms)
    def start_detectors(self) -> None:
-        for name in self.config.cameras.keys():
+        for name, camera_config in self.config.cameras.items():
            self.detection_out_events[name] = mp.Event()
            try:
@ -190,6 +193,7 @@ class FrigateApp:
                    [
                        det.model.height * det.model.width * 3
                        for (name, det) in self.config.detectors.items()
                        if det.model.type == "object"
                    ]
                )
                shm_in = mp.shared_memory.SharedMemory(
@ -210,10 +214,43 @@ class FrigateApp:
            self.detection_shms.append(shm_in)
            self.detection_shms.append(shm_out)
            if any(
                ["detect_audio" in input.roles for input in camera_config.ffmpeg.inputs]
            ):
                self.detection_out_events[f"{name}-audio"] = mp.Event()
                try:
                    shm_in_audio = mp.shared_memory.SharedMemory(
                        name=f"{name}-audio",
                        create=True,
                        size=int(
                            round(
                                self.config.audio_model.duration
                                * self.config.audio_model.sample_rate
                            )
                        )
                        * 4,  # stored as float32, so 4 bytes per sample
                    )
                except FileExistsError:
                    shm_in_audio = mp.shared_memory.SharedMemory(name=f"{name}-audio")
                try:
                    shm_out_audio = mp.shared_memory.SharedMemory(
                        name=f"out-{name}-audio", create=True, size=20 * 6 * 4
                    )
                except FileExistsError:
                    shm_out_audio = mp.shared_memory.SharedMemory(
                        name=f"out-{name}-audio"
                    )
                self.detection_shms.append(shm_in_audio)
                self.detection_shms.append(shm_out_audio)
        for name, detector_config in self.config.detectors.items():
            self.detectors[name] = ObjectDetectProcess(
                name,
-                self.detection_queue,
+                self.audio_detection_queue
                if detector_config.model.type == "audio"
                else self.detection_queue,
                self.detection_out_events,
                detector_config,
            )
@ -245,6 +282,54 @@ class FrigateApp:
        output_processor.start()
        logger.info(f"Output process started: {output_processor.pid}")
    def start_audio_processors(self) -> None:
        # Make sure we have audio detectors
        if not any(
            [det.model.type == "audio" for det in self.config.detectors.values()]
        ):
            return
        for name, config in self.config.cameras.items():
            if not any(
                ["detect_audio" in inputs.roles for inputs in config.ffmpeg.inputs]
            ):
                continue
            if not config.enabled:
                logger.info(f"Audio processor not started for disabled camera {name}")
                continue
            audio_capture = mp.Process(
                target=capture_audio,
                name=f"audio_capture:{name}",
                args=(
                    name,
                    self.config.audio_model,
                    self.camera_metrics[name],
                ),
            )
            audio_capture.daemon = True
            self.camera_metrics[name]["audio_capture"] = audio_capture
            audio_capture.start()
            logger.info(f"Audio capture started for {name}: {audio_capture.pid}")
            audio_process = mp.Process(
                target=process_audio,
                name=f"audio_process:{name}",
                args=(
                    name,
                    config,
                    self.config.audio_model,
                    self.config.audio_model.merged_labelmap,
                    self.audio_detection_queue,
                    self.detection_out_events[f"{name}-audio"],
                    self.camera_metrics[name],
                ),
            )
            audio_process.daemon = True
            self.camera_metrics[name]["audio_process"] = audio_process
            audio_process.start()
            logger.info(f"Audio processor started for {name}: {audio_process.pid}")
    def start_camera_processors(self) -> None:
        for name, config in self.config.cameras.items():
            if not self.config.cameras[name].enabled:
@ -364,6 +449,7 @@ class FrigateApp:
        self.start_detectors()
        self.start_video_output_processor()
        self.start_detected_frames_processor()
        self.start_audio_processors()
        self.start_camera_processors()
        self.start_camera_capture_processes()
        self.start_storage_maintainer()
--- a/frigate/audio.py
+++ b/frigate/audio.py
@ -0,0 +1,126 @@
 import datetime
 import logging
 import multiprocessing as mp
 import queue
 import random
 import signal
 import string
 import threading
 import numpy as np
 from setproctitle import setproctitle
 from frigate.config import CameraConfig, AudioModelConfig
 from frigate.object_detection import RemoteObjectDetector
 from frigate.util import listen, SharedMemoryFrameManager
 logger = logging.getLogger(__name__)
 def capture_audio(
    name: str,
    model_config: AudioModelConfig,
    process_info,
 ):
    stop_event = mp.Event()
    def receiveSignal(signalNumber, frame):
        stop_event.set()
    signal.signal(signal.SIGTERM, receiveSignal)
    signal.signal(signal.SIGINT, receiveSignal)
    threading.current_thread().name = f"capture:{name}"
    setproctitle(f"frigate.capture:{name}")
    listen()
    chunk_size = int(round(model_config.duration * model_config.sample_rate * 2))
    key = f"{name}-audio"
    audio_queue = process_info["audio_queue"]
    frame_manager = SharedMemoryFrameManager()
    current_frame = mp.Value("d", 0.0)
    pipe = open(f"/tmp/{key}", "rb")
    while not stop_event.is_set():
        current_frame.value = datetime.datetime.now().timestamp()
        frame_name = f"{key}{current_frame.value}"
        frame_buffer = frame_manager.create(frame_name, chunk_size)
        try:
            frame_buffer[:] = pipe.read(chunk_size)
        except Exception as e:
            continue
        # if the queue is full, skip this frame
        if audio_queue.full():
            frame_manager.delete(frame_name)
            continue
        # close the frame
        frame_manager.close(frame_name)
        # add to the queue
        audio_queue.put(current_frame.value)
 def process_audio(
    name: str,
    camera_config: CameraConfig,
    model_config: AudioModelConfig,
    labelmap,
    detection_queue: mp.Queue,
    result_connection,
    process_info,
 ):
    stop_event = mp.Event()
    def receiveSignal(signalNumber, frame):
        stop_event.set()
    signal.signal(signal.SIGTERM, receiveSignal)
    signal.signal(signal.SIGINT, receiveSignal)
    threading.current_thread().name = f"process:{name}"
    setproctitle(f"frigate.process:{name}")
    listen()
    shape = (int(round(model_config.duration * model_config.sample_rate)),)
    key = f"{name}-audio"
    audio_queue = process_info["audio_queue"]
    frame_manager = SharedMemoryFrameManager()
    detector = RemoteObjectDetector(
        key, labelmap, detection_queue, result_connection, model_config
    )
    while not stop_event.is_set():
        try:
            frame_time = audio_queue.get(True, 10)
        except queue.Empty:
            continue
        audio = frame_manager.get(f"{key}{frame_time}", shape, dtype=np.int16)
        if audio is None:
            logger.info(f"{key}: audio {frame_time} is not in memory store.")
            continue
        waveform = (audio / 32768.0).astype(np.float32)
        model_detections = detector.detect(waveform)
        for label, score, _ in model_detections:
            if label not in camera_config.objects.track:
                continue
            filters = camera_config.objects.filters.get(label)
            if filters:
                if score < filters.min_score:
                    continue
            logger.info(f"{label}: {score}")
        frame_manager.close(f"{key}{frame_time}")
--- a/frigate/config.py
+++ b/frigate/config.py
@ -36,8 +36,10 @@ from frigate.ffmpeg_presets import (
 from frigate.detectors import (
    PixelFormatEnum,
    InputTensorEnum,
    ModelConfig,
    DetectorConfig,
    ModelConfig,
    AudioModelConfig,
    ObjectModelConfig,
 )
 from frigate.version import VERSION
@ -51,7 +53,7 @@ DEFAULT_TIME_FORMAT = "%m/%d/%Y %H:%M:%S"
 FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}
-DEFAULT_TRACKED_OBJECTS = ["person"]
+DEFAULT_TRACKED_OBJECTS = ["person", "Speech"]
 DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}
@ -358,6 +360,7 @@ class BirdseyeCameraConfig(BaseModel):
 FFMPEG_GLOBAL_ARGS_DEFAULT = ["-hide_banner", "-loglevel", "warning"]
 FFMPEG_INPUT_ARGS_DEFAULT = "preset-rtsp-generic"
 DETECT_FFMPEG_OUTPUT_ARGS_DEFAULT = ["-f", "rawvideo", "-pix_fmt", "yuv420p"]
 DETECT_AUDIO_FFMPEG_OUTPUT_ARGS_DEFAULT = ["-f", "s16le", "-ar", "16000", "-ac", "1"]
 RTMP_FFMPEG_OUTPUT_ARGS_DEFAULT = "preset-rtmp-generic"
 RECORD_FFMPEG_OUTPUT_ARGS_DEFAULT = "preset-record-generic"
@ -367,6 +370,10 @@ class FfmpegOutputArgsConfig(FrigateBaseModel):
        default=DETECT_FFMPEG_OUTPUT_ARGS_DEFAULT,
        title="Detect role FFmpeg output arguments.",
    )
    detect_audio: Union[str, List[str]] = Field(
        default=DETECT_AUDIO_FFMPEG_OUTPUT_ARGS_DEFAULT,
        title="Detect role FFmpeg output arguments.",
    )
    record: Union[str, List[str]] = Field(
        default=RECORD_FFMPEG_OUTPUT_ARGS_DEFAULT,
        title="Record role FFmpeg output arguments.",
@ -398,6 +405,7 @@ class CameraRoleEnum(str, Enum):
    restream = "restream"
    rtmp = "rtmp"
    detect = "detect"
    detect_audio = "detect_audio"
 class CameraInput(FrigateBaseModel):
@ -597,6 +605,7 @@ class CameraConfig(FrigateBaseModel):
        # add roles to the input if there is only one
        if len(config["ffmpeg"]["inputs"]) == 1:
            has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
            has_audio = "detect_audio" in config["ffmpeg"]["inputs"][0].get("roles", [])
            config["ffmpeg"]["inputs"][0]["roles"] = [
                "record",
@ -606,6 +615,8 @@ class CameraConfig(FrigateBaseModel):
            if has_rtmp:
                config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")
            if has_audio:
                config["ffmpeg"]["inputs"][0]["roles"].append("detect_audio")
        super().__init__(**config)
@ -646,6 +657,15 @@ class CameraConfig(FrigateBaseModel):
            )
            ffmpeg_output_args = scale_detect_args + ffmpeg_output_args + ["pipe:"]
        if "detect_audio" in ffmpeg_input.roles:
            detect_args = get_ffmpeg_arg_list(self.ffmpeg.output_args.detect_audio)
            pipe = f"/tmp/{self.name}-audio"
            try:
                os.mkfifo(pipe)
            except FileExistsError:
                pass
            ffmpeg_output_args = detect_args + ["-y", pipe] + ffmpeg_output_args
        if "rtmp" in ffmpeg_input.roles and self.rtmp.enabled:
            rtmp_args = get_ffmpeg_arg_list(
                parse_preset_output_rtmp(self.ffmpeg.output_args.rtmp)
@ -815,8 +835,11 @@ class FrigateConfig(FrigateBaseModel):
        default_factory=dict, title="Frigate environment variables."
    )
    ui: UIConfig = Field(default_factory=UIConfig, title="UI configuration.")
-    model: ModelConfig = Field(
+    audio_model: AudioModelConfig = Field(
-        default_factory=ModelConfig, title="Detection model configuration."
+        default_factory=AudioModelConfig, title="Audio model configuration."
    )
    model: ObjectModelConfig = Field(
        default_factory=ObjectModelConfig, title="Detection model configuration."
    )
    detectors: Dict[str, DetectorConfig] = Field(
        default=DEFAULT_DETECTORS,
@ -975,25 +998,21 @@ class FrigateConfig(FrigateBaseModel):
            if detector_config.model is None:
                detector_config.model = config.model
            else:
-                model = detector_config.model
+                detector_model = detector_config.model.dict(exclude_unset=True)
-                schema = ModelConfig.schema()["properties"]
+                # If any keys are set in the detector_model other than type or path, warn
-                if (
+                if any(key not in ["type", "path"] for key in detector_model.keys()):
                    model.width != schema["width"]["default"]
                    or model.height != schema["height"]["default"]
                    or model.labelmap_path is not None
                    or model.labelmap is not {}
                    or model.input_tensor != schema["input_tensor"]["default"]
                    or model.input_pixel_format
                    != schema["input_pixel_format"]["default"]
                ):
                    logger.warning(
-                        "Customizing more than a detector model path is unsupported."
+                        "Customizing more than a detector model type or path is unsupported."
                    )
                merged_model = deep_merge(
-                detector_config.model.dict(exclude_unset=True),
+                    detector_model,
-                config.model.dict(exclude_unset=True),
+                    config.model.dict(exclude_unset=True)
                    if detector_config.model.type == "object"
                    else config.audio_model.dict(exclude_unset=True),
                )
                detector_config.model = parse_obj_as(
                    ModelConfig, {"type": detector_config.model.type, **merged_model}
                )
            detector_config.model = ModelConfig.parse_obj(merged_model)
            config.detectors[key] = detector_config
        return config
--- a/frigate/detectors/init.py
+++ b/frigate/detectors/init.py
@ -2,17 +2,23 @@ import logging
 from .detection_api import DetectionApi
 from .detector_config import (
    AudioModelConfig,
    PixelFormatEnum,
    InputTensorEnum,
    ModelConfig,
    ObjectModelConfig,
 )
 from .detector_types import (
    DetectorTypeEnum,
    api_types,
    DetectorConfig,
 )
 from .detector_types import DetectorTypeEnum, api_types, DetectorConfig
 logger = logging.getLogger(__name__)
-def create_detector(detector_config):
+def create_detector(detector_config: DetectorConfig):
    if detector_config.type == DetectorTypeEnum.cpu:
        logger.warning(
            "CPU detectors are not recommended and should only be used for testing or for trial purposes."
--- a/frigate/detectors/detector_config.py
+++ b/frigate/detectors/detector_config.py
@ -1,6 +1,7 @@
 import logging
 from enum import Enum
 from typing import Dict, List, Optional, Tuple, Union, Literal
 from typing_extensions import Annotated
 import matplotlib.pyplot as plt
 from pydantic import BaseModel, Extra, Field, validator
@ -12,6 +13,11 @@ from frigate.util import load_labels
 logger = logging.getLogger(__name__)
 class ModelTypeEnum(str, Enum):
    object = "object"
    audio = "audio"
 class PixelFormatEnum(str, Enum):
    rgb = "rgb"
    bgr = "bgr"
@ -23,20 +29,13 @@ class InputTensorEnum(str, Enum):
    nhwc = "nhwc"
-class ModelConfig(BaseModel):
+class BaseModelConfig(BaseModel):
-    path: Optional[str] = Field(title="Custom Object detection model path.")
+    type: str = Field(default="object", title="Model Type")
-    labelmap_path: Optional[str] = Field(title="Label map for custom object detector.")
+    path: Optional[str] = Field(title="Custom model path.")
-    width: int = Field(default=320, title="Object detection model input width.")
+    labelmap_path: Optional[str] = Field(title="Label map for custom model.")
    height: int = Field(default=320, title="Object detection model input height.")
    labelmap: Dict[int, str] = Field(
        default_factory=dict, title="Labelmap customization."
    )
    input_tensor: InputTensorEnum = Field(
        default=InputTensorEnum.nhwc, title="Model Input Tensor Shape"
    )
    input_pixel_format: PixelFormatEnum = Field(
        default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format"
    )
    _merged_labelmap: Optional[Dict[int, str]] = PrivateAttr()
    _colormap: Dict[int, Tuple[int, int, int]] = PrivateAttr()
@ -65,15 +64,48 @@ class ModelConfig(BaseModel):
            self._colormap[val] = tuple(int(round(255 * c)) for c in cmap(key)[:3])
    class Config:
-        extra = Extra.forbid
+        extra = Extra.allow
        arbitrary_types_allowed = True
 class ObjectModelConfig(BaseModelConfig):
    type: Literal["object"] = "object"
    width: int = Field(default=320, title="Object detection model input width.")
    height: int = Field(default=320, title="Object detection model input height.")
    input_tensor: InputTensorEnum = Field(
        default=InputTensorEnum.nhwc, title="Model Input Tensor Shape"
    )
    input_pixel_format: PixelFormatEnum = Field(
        default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format"
    )
 class AudioModelConfig(BaseModelConfig):
    type: Literal["audio"] = "audio"
    duration: float = Field(default=0.975, title="Model Input Audio Duration")
    format: str = Field(default="s16le", title="Model Input Audio Format")
    sample_rate: int = Field(default=16000, title="Model Input Sample Rate")
    channels: int = Field(default=1, title="Model Input Number of Channels")
    def __init__(self, **config):
        super().__init__(**config)
        self._merged_labelmap = {
            **load_labels(config.get("labelmap_path", "/yamnet_label_list.txt")),
            **config.get("labelmap", {}),
        }
 ModelConfig = Annotated[
    Union[tuple(BaseModelConfig.__subclasses__())],
    Field(discriminator="type"),
 ]
 class BaseDetectorConfig(BaseModel):
    # the type field must be defined in all subclasses
    type: str = Field(default="cpu", title="Detector Type")
-    model: ModelConfig = Field(
+    model: Optional[ModelConfig]
        default=None, title="Detector specific model configuration."
    )
    class Config:
        extra = Extra.allow
--- a/frigate/detectors/plugins/cpu_tfl.py
+++ b/frigate/detectors/plugins/cpu_tfl.py
@ -22,8 +22,12 @@ class CpuTfl(DetectionApi):
    type_key = DETECTOR_KEY
    def __init__(self, detector_config: CpuDetectorConfig):
        self.is_audio = detector_config.model.type == "audio"
        default_model = (
            "/cpu_model.tflite" if not self.is_audio else "/cpu_audio_model.tflite"
        )
        self.interpreter = tflite.Interpreter(
-            model_path=detector_config.model.path or "/cpu_model.tflite",
+            model_path=detector_config.model.path or default_model,
            num_threads=detector_config.num_threads or 3,
        )
@ -36,15 +40,29 @@ class CpuTfl(DetectionApi):
        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
        self.interpreter.invoke()
        detections = np.zeros((20, 6), np.float32)
        if self.is_audio:
            res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
            non_zero_indices = res > 0
            class_ids = np.argpartition(-res, 20)[:20]
            class_ids = class_ids[np.argsort(-res[class_ids])]
            class_ids = class_ids[non_zero_indices[class_ids]]
            scores = res[class_ids]
            boxes = np.full((scores.shape[0], 4), -1, np.float32)
            count = len(scores)
        else:
            boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0]
-        class_ids = self.interpreter.tensor(self.tensor_output_details[1]["index"])()[0]
+            class_ids = self.interpreter.tensor(
-        scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[0]
+                self.tensor_output_details[1]["index"]
            )()[0]
            scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[
                0
            ]
            count = int(
                self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0]
            )
        detections = np.zeros((20, 6), np.float32)
        for i in range(count):
            if scores[i] < 0.4 or i == 20:
                break
--- a/frigate/detectors/plugins/edgetpu_tfl.py
+++ b/frigate/detectors/plugins/edgetpu_tfl.py
@ -23,6 +23,7 @@ class EdgeTpuTfl(DetectionApi):
    type_key = DETECTOR_KEY
    def __init__(self, detector_config: EdgeTpuDetectorConfig):
        self.is_audio = detector_config.model.type == "audio"
        device_config = {"device": "usb"}
        if detector_config.device is not None:
            device_config = {"device": detector_config.device}
@ -33,8 +34,13 @@ class EdgeTpuTfl(DetectionApi):
            logger.info(f"Attempting to load TPU as {device_config['device']}")
            edge_tpu_delegate = load_delegate("libedgetpu.so.1.0", device_config)
            logger.info("TPU found")
            default_model = (
                "/edgetpu_model.tflite"
                if not self.is_audio
                else "/edgetpu_audio_model.tflite"
            )
            self.interpreter = tflite.Interpreter(
-                model_path=detector_config.model.path or "/edgetpu_model.tflite",
+                model_path=detector_config.model.path or default_model,
                experimental_delegates=[edge_tpu_delegate],
            )
        except ValueError:
@ -52,15 +58,29 @@ class EdgeTpuTfl(DetectionApi):
        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
        self.interpreter.invoke()
        detections = np.zeros((20, 6), np.float32)
        if self.is_audio:
            res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
            non_zero_indices = res > 0
            class_ids = np.argpartition(-res, 20)[:20]
            class_ids = class_ids[np.argsort(-res[class_ids])]
            class_ids = class_ids[non_zero_indices[class_ids]]
            scores = res[class_ids]
            boxes = np.full((scores.shape[0], 4), -1, np.float32)
            count = len(scores)
        else:
            boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0]
-        class_ids = self.interpreter.tensor(self.tensor_output_details[1]["index"])()[0]
+            class_ids = self.interpreter.tensor(
-        scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[0]
+                self.tensor_output_details[1]["index"]
            )()[0]
            scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[
                0
            ]
            count = int(
                self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0]
            )
        detections = np.zeros((20, 6), np.float32)
        for i in range(count):
            if scores[i] < 0.4 or i == 20:
                break
--- a/frigate/object_detection.py
+++ b/frigate/object_detection.py
@ -44,7 +44,7 @@ class LocalObjectDetector(ObjectDetector):
        else:
            self.labels = load_labels(labels)
-        if detector_config:
+        if detector_config.model.type == "object":
            self.input_transform = tensor_transform(detector_config.model.input_tensor)
        else:
            self.input_transform = None
@ -107,6 +107,20 @@ def run_detector(
            connection_id = detection_queue.get(timeout=5)
        except queue.Empty:
            continue
        if detector_config.model.type == "audio":
            input_frame = frame_manager.get(
                connection_id,
                (
                    int(
                        round(
                            detector_config.model.duration
                            * detector_config.model.sample_rate
                        )
                    ),
                ),
                dtype=np.float32,
            )
        else:
            input_frame = frame_manager.get(
                connection_id,
                (1, detector_config.model.height, detector_config.model.width, 3),
@ -180,6 +194,13 @@ class RemoteObjectDetector:
        self.detection_queue = detection_queue
        self.event = event
        self.shm = mp.shared_memory.SharedMemory(name=self.name, create=False)
        if model_config.type == "audio":
            self.np_shm = np.ndarray(
                (int(round(model_config.duration * model_config.sample_rate)),),
                dtype=np.float32,
                buffer=self.shm.buf,
            )
        else:
            self.np_shm = np.ndarray(
                (1, model_config.height, model_config.width, 3),
                dtype=np.uint8,
--- a/frigate/util.py
+++ b/frigate/util.py
@ -915,7 +915,7 @@ class FrameManager(ABC):
        pass
    @abstractmethod
-    def get(self, name, timeout_ms=0):
+    def get(self, name):
        pass
    @abstractmethod
@ -956,13 +956,13 @@ class SharedMemoryFrameManager(FrameManager):
        self.shm_store[name] = shm
        return shm.buf
-    def get(self, name, shape):
+    def get(self, name, shape, dtype=np.uint8):
        if name in self.shm_store:
            shm = self.shm_store[name]
        else:
            shm = shared_memory.SharedMemory(name=name)
            self.shm_store[name] = shm
-        return np.ndarray(shape, dtype=np.uint8, buffer=shm.buf)
+        return np.ndarray(shape, dtype=dtype, buffer=shm.buf)
    def close(self, name):
        if name in self.shm_store: