From 51e1e5b7a599a5de89b952fc5c146e75de9d08b7 Mon Sep 17 00:00:00 2001 From: Nick Mowen Date: Sat, 17 Jun 2023 09:49:45 -0600 Subject: [PATCH] Keep audio labelmap local --- Dockerfile | 4 +- audio-labelmap.txt | 521 +++++++++++++++++++++++++++ frigate/detectors/detector_config.py | 136 ++++--- 3 files changed, 609 insertions(+), 52 deletions(-) create mode 100644 audio-labelmap.txt diff --git a/Dockerfile b/Dockerfile index ffde307cb..f40d812bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ FROM debian:11-slim AS slim-base FROM slim-base AS wget ARG DEBIAN_FRONTEND RUN apt-get update \ - && apt-get install -y wget xz-utils unzip \ + && apt-get install -y wget xz-utils \ && rm -rf /var/lib/apt/lists/* WORKDIR /rootfs @@ -96,7 +96,7 @@ RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/da # Get Audio Model and labels RUN wget -qO edgetpu_audio_model.tflite https://tfhub.dev/google/coral-model/yamnet/classification/coral/1?coral-format=tflite RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite -RUN unzip -q edgetpu_audio_model.tflite yamnet_label_list.txt && chmod +r yamnet_label_list.txt +COPY audio-labelmap.txt . FROM wget AS s6-overlay diff --git a/audio-labelmap.txt b/audio-labelmap.txt new file mode 100644 index 000000000..c3e864e7d --- /dev/null +++ b/audio-labelmap.txt @@ -0,0 +1,521 @@ +Speech +Child speech, kid speaking +Conversation +Narration, monologue +Babbling +Speech synthesizer +Shout +Bellow +Whoop +Yell +Children shouting +Screaming +Whispering +Laughter +Baby laughter +Giggle +Snicker +Belly laugh +Chuckle, chortle +Crying, sobbing +Baby cry, infant cry +Whimper +Wail, moan +Sigh +Singing +Choir +Yodeling +Chant +Mantra +Child singing +Synthetic singing +Rapping +Humming +Groan +Grunt +Whistling +Breathing +Wheeze +Snoring +Gasp +Pant +Snort +Cough +Throat clearing +Sneeze +Sniff +Run +Shuffle +Walk, footsteps +Chewing, mastication +Biting +Gargling +Stomach rumble +Burping, eructation +Hiccup +Fart +Hands +Finger snapping +Clapping +Heart sounds, heartbeat +Heart murmur +Cheering +Applause +Chatter +Crowd +Hubbub, speech noise, speech babble +Children playing +Animal +Domestic animals, pets +Dog +Bark +Yip +Howl +Bow-wow +Growling +Whimper (dog) +Cat +Purr +Meow +Hiss +Caterwaul +Livestock, farm animals, working animals +Horse +Clip-clop +Neigh, whinny +Cattle, bovinae +Moo +Cowbell +Pig +Oink +Goat +Bleat +Sheep +Fowl +Chicken, rooster +Cluck +Crowing, cock-a-doodle-doo +Turkey +Gobble +Duck +Quack +Goose +Honk +Wild animals +Roaring cats (lions, tigers) +Roar +Bird +Bird vocalization, bird call, bird song +Chirp, tweet +Squawk +Pigeon, dove +Coo +Crow +Caw +Owl +Hoot +Bird flight, flapping wings +Canidae, dogs, wolves +Rodents, rats, mice +Mouse +Patter +Insect +Cricket +Mosquito +Fly, housefly +Buzz +Bee, wasp, etc. +Frog +Croak +Snake +Rattle +Whale vocalization +Music +Musical instrument +Plucked string instrument +Guitar +Electric guitar +Bass guitar +Acoustic guitar +Steel guitar, slide guitar +Tapping (guitar technique) +Strum +Banjo +Sitar +Mandolin +Zither +Ukulele +Keyboard (musical) +Piano +Electric piano +Organ +Electronic organ +Hammond organ +Synthesizer +Sampler +Harpsichord +Percussion +Drum kit +Drum machine +Drum +Snare drum +Rimshot +Drum roll +Bass drum +Timpani +Tabla +Cymbal +Hi-hat +Wood block +Tambourine +Rattle (instrument) +Maraca +Gong +Tubular bells +Mallet percussion +Marimba, xylophone +Glockenspiel +Vibraphone +Steelpan +Orchestra +Brass instrument +French horn +Trumpet +Trombone +Bowed string instrument +String section +Violin, fiddle +Pizzicato +Cello +Double bass +Wind instrument, woodwind instrument +Flute +Saxophone +Clarinet +Harp +Bell +Church bell +Jingle bell +Bicycle bell +Tuning fork +Chime +Wind chime +Change ringing (campanology) +Harmonica +Accordion +Bagpipes +Didgeridoo +Shofar +Theremin +Singing bowl +Scratching (performance technique) +Pop music +Hip hop music +Beatboxing +Rock music +Heavy metal +Punk rock +Grunge +Progressive rock +Rock and roll +Psychedelic rock +Rhythm and blues +Soul music +Reggae +Country +Swing music +Bluegrass +Funk +Folk music +Middle Eastern music +Jazz +Disco +Classical music +Opera +Electronic music +House music +Techno +Dubstep +Drum and bass +Electronica +Electronic dance music +Ambient music +Trance music +Music of Latin America +Salsa music +Flamenco +Blues +Music for children +New-age music +Vocal music +A capella +Music of Africa +Afrobeat +Christian music +Gospel music +Music of Asia +Carnatic music +Music of Bollywood +Ska +Traditional music +Independent music +Song +Background music +Theme music +Jingle (music) +Soundtrack music +Lullaby +Video game music +Christmas music +Dance music +Wedding music +Happy music +Sad music +Tender music +Exciting music +Angry music +Scary music +Wind +Rustling leaves +Wind noise (microphone) +Thunderstorm +Thunder +Water +Rain +Raindrop +Rain on surface +Stream +Waterfall +Ocean +Waves, surf +Steam +Gurgling +Fire +Crackle +Vehicle +Boat, Water vehicle +Sailboat, sailing ship +Rowboat, canoe, kayak +Motorboat, speedboat +Ship +Motor vehicle (road) +Car +Vehicle horn, car horn, honking +Toot +Car alarm +Power windows, electric windows +Skidding +Tire squeal +Car passing by +Race car, auto racing +Truck +Air brake +Air horn, truck horn +Reversing beeps +Ice cream truck, ice cream van +Bus +Emergency vehicle +Police car (siren) +Ambulance (siren) +Fire engine, fire truck (siren) +Motorcycle +Traffic noise, roadway noise +Rail transport +Train +Train whistle +Train horn +Railroad car, train wagon +Train wheels squealing +Subway, metro, underground +Aircraft +Aircraft engine +Jet engine +Propeller, airscrew +Helicopter +Fixed-wing aircraft, airplane +Bicycle +Skateboard +Engine +Light engine (high frequency) +Dental drill, dentist's drill +Lawn mower +Chainsaw +Medium engine (mid frequency) +Heavy engine (low frequency) +Engine knocking +Engine starting +Idling +Accelerating, revving, vroom +Door +Doorbell +Ding-dong +Sliding door +Slam +Knock +Tap +Squeak +Cupboard open or close +Drawer open or close +Dishes, pots, and pans +Cutlery, silverware +Chopping (food) +Frying (food) +Microwave oven +Blender +Water tap, faucet +Sink (filling or washing) +Bathtub (filling or washing) +Hair dryer +Toilet flush +Toothbrush +Electric toothbrush +Vacuum cleaner +Zipper (clothing) +Keys jangling +Coin (dropping) +Scissors +Electric shaver, electric razor +Shuffling cards +Typing +Typewriter +Computer keyboard +Writing +Alarm +Telephone +Telephone bell ringing +Ringtone +Telephone dialing, DTMF +Dial tone +Busy signal +Alarm clock +Siren +Civil defense siren +Buzzer +Smoke detector, smoke alarm +Fire alarm +Foghorn +Whistle +Steam whistle +Mechanisms +Ratchet, pawl +Clock +Tick +Tick-tock +Gears +Pulleys +Sewing machine +Mechanical fan +Air conditioning +Cash register +Printer +Camera +Single-lens reflex camera +Tools +Hammer +Jackhammer +Sawing +Filing (rasp) +Sanding +Power tool +Drill +Explosion +Gunshot, gunfire +Machine gun +Fusillade +Artillery fire +Cap gun +Fireworks +Firecracker +Burst, pop +Eruption +Boom +Wood +Chop +Splinter +Crack +Glass +Chink, clink +Shatter +Liquid +Splash, splatter +Slosh +Squish +Drip +Pour +Trickle, dribble +Gush +Fill (with liquid) +Spray +Pump (liquid) +Stir +Boiling +Sonar +Arrow +Whoosh, swoosh, swish +Thump, thud +Thunk +Electronic tuner +Effects unit +Chorus effect +Basketball bounce +Bang +Slap, smack +Whack, thwack +Smash, crash +Breaking +Bouncing +Whip +Flap +Scratch +Scrape +Rub +Roll +Crushing +Crumpling, crinkling +Tearing +Beep, bleep +Ping +Ding +Clang +Squeal +Creak +Rustle +Whir +Clatter +Sizzle +Clicking +Clickety-clack +Rumble +Plop +Jingle, tinkle +Hum +Zing +Boing +Crunch +Silence +Sine wave +Harmonic +Chirp tone +Sound effect +Pulse +Inside, small room +Inside, large room or hall +Inside, public space +Outside, urban or manmade +Outside, rural or natural +Reverberation +Echo +Noise +Environmental noise +Static +Mains hum +Distortion +Sidetone +Cacophony +White noise +Pink noise +Throbbing +Vibration +Television +Radio +Field recording diff --git a/frigate/detectors/detector_config.py b/frigate/detectors/detector_config.py index f3d3bb37c..f65826a57 100644 --- a/frigate/detectors/detector_config.py +++ b/frigate/detectors/detector_config.py @@ -1,23 +1,21 @@ +import hashlib +import json import logging +import os from enum import Enum -from typing import Dict, List, Optional, Tuple, Union, Literal -from typing_extensions import Annotated +from typing import Dict, Optional, Tuple import matplotlib.pyplot as plt -from pydantic import BaseModel, Extra, Field, validator +import requests +from pydantic import BaseModel, Extra, Field from pydantic.fields import PrivateAttr +from frigate.plus import PlusApi from frigate.util import load_labels - logger = logging.getLogger(__name__) -class ModelTypeEnum(str, Enum): - object = "object" - audio = "audio" - - class PixelFormatEnum(str, Enum): rgb = "rgb" bgr = "bgr" @@ -29,15 +27,33 @@ class InputTensorEnum(str, Enum): nhwc = "nhwc" -class BaseModelConfig(BaseModel): - type: str = Field(default="object", title="Model Type") - path: Optional[str] = Field(title="Custom model path.") - labelmap_path: Optional[str] = Field(title="Label map for custom model.") +class ModelTypeEnum(str, Enum): + ssd = "ssd" + yolox = "yolox" + yolov5 = "yolov5" + yolov8 = "yolov8" + + +class ModelConfig(BaseModel): + path: Optional[str] = Field(title="Custom Object detection model path.") + labelmap_path: Optional[str] = Field(title="Label map for custom object detector.") + width: int = Field(default=320, title="Object detection model input width.") + height: int = Field(default=320, title="Object detection model input height.") labelmap: Dict[int, str] = Field( default_factory=dict, title="Labelmap customization." ) + input_tensor: InputTensorEnum = Field( + default=InputTensorEnum.nhwc, title="Model Input Tensor Shape" + ) + input_pixel_format: PixelFormatEnum = Field( + default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format" + ) + model_type: ModelTypeEnum = Field( + default=ModelTypeEnum.ssd, title="Object Detection Model Type" + ) _merged_labelmap: Optional[Dict[int, str]] = PrivateAttr() _colormap: Dict[int, Tuple[int, int, int]] = PrivateAttr() + _model_hash: str = PrivateAttr() @property def merged_labelmap(self) -> Dict[int, str]: @@ -47,6 +63,10 @@ class BaseModelConfig(BaseModel): def colormap(self) -> Dict[int, Tuple[int, int, int]]: return self._colormap + @property + def model_hash(self) -> str: + return self._model_hash + def __init__(self, **config): super().__init__(**config) @@ -56,6 +76,55 @@ class BaseModelConfig(BaseModel): } self._colormap = {} + def check_and_load_plus_model( + self, plus_api: PlusApi, detector: str = None + ) -> None: + if not self.path or not self.path.startswith("plus://"): + return + + model_id = self.path[7:] + self.path = f"/config/model_cache/{model_id}" + model_info_path = f"{self.path}.json" + + # download the model if it doesn't exist + if not os.path.isfile(self.path): + download_url = plus_api.get_model_download_url(model_id) + r = requests.get(download_url) + with open(self.path, "wb") as f: + f.write(r.content) + + # download the model info if it doesn't exist + if not os.path.isfile(model_info_path): + model_info = plus_api.get_model_info(model_id) + with open(model_info_path, "w") as f: + json.dump(model_info, f) + else: + with open(model_info_path, "r") as f: + model_info = json.load(f) + + if detector and detector not in model_info["supportedDetectors"]: + raise ValueError(f"Model does not support detector type of {detector}") + + self.width = model_info["width"] + self.height = model_info["height"] + self.input_tensor = model_info["inputShape"] + self.input_pixel_format = model_info["pixelFormat"] + self.model_type = model_info["type"] + self._merged_labelmap = { + **{int(key): val for key, val in model_info["labelMap"].items()}, + **self.labelmap, + } + + def compute_model_hash(self) -> None: + if not self.path or not os.path.exists(self.path): + self._model_hash = hashlib.md5(b"unknown").hexdigest() + else: + with open(self.path, "rb") as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + self._model_hash = file_hash.hexdigest() + def create_colormap(self, enabled_labels: set[str]) -> None: """Get a list of colors for enabled labels.""" cmap = plt.cm.get_cmap("tab10", len(enabled_labels)) @@ -64,48 +133,15 @@ class BaseModelConfig(BaseModel): self._colormap[val] = tuple(int(round(255 * c)) for c in cmap(key)[:3]) class Config: - extra = Extra.allow - arbitrary_types_allowed = True - - -class ObjectModelConfig(BaseModelConfig): - type: Literal["object"] = "object" - width: int = Field(default=320, title="Object detection model input width.") - height: int = Field(default=320, title="Object detection model input height.") - input_tensor: InputTensorEnum = Field( - default=InputTensorEnum.nhwc, title="Model Input Tensor Shape" - ) - input_pixel_format: PixelFormatEnum = Field( - default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format" - ) - - -class AudioModelConfig(BaseModelConfig): - type: Literal["audio"] = "audio" - duration: float = Field(default=0.975, title="Model Input Audio Duration") - format: str = Field(default="s16le", title="Model Input Audio Format") - sample_rate: int = Field(default=16000, title="Model Input Sample Rate") - channels: int = Field(default=1, title="Model Input Number of Channels") - - def __init__(self, **config): - super().__init__(**config) - - self._merged_labelmap = { - **load_labels(config.get("labelmap_path", "/yamnet_label_list.txt")), - **config.get("labelmap", {}), - } - - -ModelConfig = Annotated[ - Union[tuple(BaseModelConfig.__subclasses__())], - Field(discriminator="type"), -] + extra = Extra.forbid class BaseDetectorConfig(BaseModel): # the type field must be defined in all subclasses type: str = Field(default="cpu", title="Detector Type") - model: Optional[ModelConfig] + model: ModelConfig = Field( + default=None, title="Detector specific model configuration." + ) class Config: extra = Extra.allow