mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-05 10:45:21 +03:00
Keep audio labelmap local
This commit is contained in:
parent
73118fd620
commit
51e1e5b7a5
@ -12,7 +12,7 @@ FROM debian:11-slim AS slim-base
|
||||
FROM slim-base AS wget
|
||||
ARG DEBIAN_FRONTEND
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y wget xz-utils unzip \
|
||||
&& apt-get install -y wget xz-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /rootfs
|
||||
|
||||
@ -96,7 +96,7 @@ RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/da
|
||||
# Get Audio Model and labels
|
||||
RUN wget -qO edgetpu_audio_model.tflite https://tfhub.dev/google/coral-model/yamnet/classification/coral/1?coral-format=tflite
|
||||
RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
|
||||
RUN unzip -q edgetpu_audio_model.tflite yamnet_label_list.txt && chmod +r yamnet_label_list.txt
|
||||
COPY audio-labelmap.txt .
|
||||
|
||||
|
||||
FROM wget AS s6-overlay
|
||||
|
||||
521
audio-labelmap.txt
Normal file
521
audio-labelmap.txt
Normal file
@ -0,0 +1,521 @@
|
||||
Speech
|
||||
Child speech, kid speaking
|
||||
Conversation
|
||||
Narration, monologue
|
||||
Babbling
|
||||
Speech synthesizer
|
||||
Shout
|
||||
Bellow
|
||||
Whoop
|
||||
Yell
|
||||
Children shouting
|
||||
Screaming
|
||||
Whispering
|
||||
Laughter
|
||||
Baby laughter
|
||||
Giggle
|
||||
Snicker
|
||||
Belly laugh
|
||||
Chuckle, chortle
|
||||
Crying, sobbing
|
||||
Baby cry, infant cry
|
||||
Whimper
|
||||
Wail, moan
|
||||
Sigh
|
||||
Singing
|
||||
Choir
|
||||
Yodeling
|
||||
Chant
|
||||
Mantra
|
||||
Child singing
|
||||
Synthetic singing
|
||||
Rapping
|
||||
Humming
|
||||
Groan
|
||||
Grunt
|
||||
Whistling
|
||||
Breathing
|
||||
Wheeze
|
||||
Snoring
|
||||
Gasp
|
||||
Pant
|
||||
Snort
|
||||
Cough
|
||||
Throat clearing
|
||||
Sneeze
|
||||
Sniff
|
||||
Run
|
||||
Shuffle
|
||||
Walk, footsteps
|
||||
Chewing, mastication
|
||||
Biting
|
||||
Gargling
|
||||
Stomach rumble
|
||||
Burping, eructation
|
||||
Hiccup
|
||||
Fart
|
||||
Hands
|
||||
Finger snapping
|
||||
Clapping
|
||||
Heart sounds, heartbeat
|
||||
Heart murmur
|
||||
Cheering
|
||||
Applause
|
||||
Chatter
|
||||
Crowd
|
||||
Hubbub, speech noise, speech babble
|
||||
Children playing
|
||||
Animal
|
||||
Domestic animals, pets
|
||||
Dog
|
||||
Bark
|
||||
Yip
|
||||
Howl
|
||||
Bow-wow
|
||||
Growling
|
||||
Whimper (dog)
|
||||
Cat
|
||||
Purr
|
||||
Meow
|
||||
Hiss
|
||||
Caterwaul
|
||||
Livestock, farm animals, working animals
|
||||
Horse
|
||||
Clip-clop
|
||||
Neigh, whinny
|
||||
Cattle, bovinae
|
||||
Moo
|
||||
Cowbell
|
||||
Pig
|
||||
Oink
|
||||
Goat
|
||||
Bleat
|
||||
Sheep
|
||||
Fowl
|
||||
Chicken, rooster
|
||||
Cluck
|
||||
Crowing, cock-a-doodle-doo
|
||||
Turkey
|
||||
Gobble
|
||||
Duck
|
||||
Quack
|
||||
Goose
|
||||
Honk
|
||||
Wild animals
|
||||
Roaring cats (lions, tigers)
|
||||
Roar
|
||||
Bird
|
||||
Bird vocalization, bird call, bird song
|
||||
Chirp, tweet
|
||||
Squawk
|
||||
Pigeon, dove
|
||||
Coo
|
||||
Crow
|
||||
Caw
|
||||
Owl
|
||||
Hoot
|
||||
Bird flight, flapping wings
|
||||
Canidae, dogs, wolves
|
||||
Rodents, rats, mice
|
||||
Mouse
|
||||
Patter
|
||||
Insect
|
||||
Cricket
|
||||
Mosquito
|
||||
Fly, housefly
|
||||
Buzz
|
||||
Bee, wasp, etc.
|
||||
Frog
|
||||
Croak
|
||||
Snake
|
||||
Rattle
|
||||
Whale vocalization
|
||||
Music
|
||||
Musical instrument
|
||||
Plucked string instrument
|
||||
Guitar
|
||||
Electric guitar
|
||||
Bass guitar
|
||||
Acoustic guitar
|
||||
Steel guitar, slide guitar
|
||||
Tapping (guitar technique)
|
||||
Strum
|
||||
Banjo
|
||||
Sitar
|
||||
Mandolin
|
||||
Zither
|
||||
Ukulele
|
||||
Keyboard (musical)
|
||||
Piano
|
||||
Electric piano
|
||||
Organ
|
||||
Electronic organ
|
||||
Hammond organ
|
||||
Synthesizer
|
||||
Sampler
|
||||
Harpsichord
|
||||
Percussion
|
||||
Drum kit
|
||||
Drum machine
|
||||
Drum
|
||||
Snare drum
|
||||
Rimshot
|
||||
Drum roll
|
||||
Bass drum
|
||||
Timpani
|
||||
Tabla
|
||||
Cymbal
|
||||
Hi-hat
|
||||
Wood block
|
||||
Tambourine
|
||||
Rattle (instrument)
|
||||
Maraca
|
||||
Gong
|
||||
Tubular bells
|
||||
Mallet percussion
|
||||
Marimba, xylophone
|
||||
Glockenspiel
|
||||
Vibraphone
|
||||
Steelpan
|
||||
Orchestra
|
||||
Brass instrument
|
||||
French horn
|
||||
Trumpet
|
||||
Trombone
|
||||
Bowed string instrument
|
||||
String section
|
||||
Violin, fiddle
|
||||
Pizzicato
|
||||
Cello
|
||||
Double bass
|
||||
Wind instrument, woodwind instrument
|
||||
Flute
|
||||
Saxophone
|
||||
Clarinet
|
||||
Harp
|
||||
Bell
|
||||
Church bell
|
||||
Jingle bell
|
||||
Bicycle bell
|
||||
Tuning fork
|
||||
Chime
|
||||
Wind chime
|
||||
Change ringing (campanology)
|
||||
Harmonica
|
||||
Accordion
|
||||
Bagpipes
|
||||
Didgeridoo
|
||||
Shofar
|
||||
Theremin
|
||||
Singing bowl
|
||||
Scratching (performance technique)
|
||||
Pop music
|
||||
Hip hop music
|
||||
Beatboxing
|
||||
Rock music
|
||||
Heavy metal
|
||||
Punk rock
|
||||
Grunge
|
||||
Progressive rock
|
||||
Rock and roll
|
||||
Psychedelic rock
|
||||
Rhythm and blues
|
||||
Soul music
|
||||
Reggae
|
||||
Country
|
||||
Swing music
|
||||
Bluegrass
|
||||
Funk
|
||||
Folk music
|
||||
Middle Eastern music
|
||||
Jazz
|
||||
Disco
|
||||
Classical music
|
||||
Opera
|
||||
Electronic music
|
||||
House music
|
||||
Techno
|
||||
Dubstep
|
||||
Drum and bass
|
||||
Electronica
|
||||
Electronic dance music
|
||||
Ambient music
|
||||
Trance music
|
||||
Music of Latin America
|
||||
Salsa music
|
||||
Flamenco
|
||||
Blues
|
||||
Music for children
|
||||
New-age music
|
||||
Vocal music
|
||||
A capella
|
||||
Music of Africa
|
||||
Afrobeat
|
||||
Christian music
|
||||
Gospel music
|
||||
Music of Asia
|
||||
Carnatic music
|
||||
Music of Bollywood
|
||||
Ska
|
||||
Traditional music
|
||||
Independent music
|
||||
Song
|
||||
Background music
|
||||
Theme music
|
||||
Jingle (music)
|
||||
Soundtrack music
|
||||
Lullaby
|
||||
Video game music
|
||||
Christmas music
|
||||
Dance music
|
||||
Wedding music
|
||||
Happy music
|
||||
Sad music
|
||||
Tender music
|
||||
Exciting music
|
||||
Angry music
|
||||
Scary music
|
||||
Wind
|
||||
Rustling leaves
|
||||
Wind noise (microphone)
|
||||
Thunderstorm
|
||||
Thunder
|
||||
Water
|
||||
Rain
|
||||
Raindrop
|
||||
Rain on surface
|
||||
Stream
|
||||
Waterfall
|
||||
Ocean
|
||||
Waves, surf
|
||||
Steam
|
||||
Gurgling
|
||||
Fire
|
||||
Crackle
|
||||
Vehicle
|
||||
Boat, Water vehicle
|
||||
Sailboat, sailing ship
|
||||
Rowboat, canoe, kayak
|
||||
Motorboat, speedboat
|
||||
Ship
|
||||
Motor vehicle (road)
|
||||
Car
|
||||
Vehicle horn, car horn, honking
|
||||
Toot
|
||||
Car alarm
|
||||
Power windows, electric windows
|
||||
Skidding
|
||||
Tire squeal
|
||||
Car passing by
|
||||
Race car, auto racing
|
||||
Truck
|
||||
Air brake
|
||||
Air horn, truck horn
|
||||
Reversing beeps
|
||||
Ice cream truck, ice cream van
|
||||
Bus
|
||||
Emergency vehicle
|
||||
Police car (siren)
|
||||
Ambulance (siren)
|
||||
Fire engine, fire truck (siren)
|
||||
Motorcycle
|
||||
Traffic noise, roadway noise
|
||||
Rail transport
|
||||
Train
|
||||
Train whistle
|
||||
Train horn
|
||||
Railroad car, train wagon
|
||||
Train wheels squealing
|
||||
Subway, metro, underground
|
||||
Aircraft
|
||||
Aircraft engine
|
||||
Jet engine
|
||||
Propeller, airscrew
|
||||
Helicopter
|
||||
Fixed-wing aircraft, airplane
|
||||
Bicycle
|
||||
Skateboard
|
||||
Engine
|
||||
Light engine (high frequency)
|
||||
Dental drill, dentist's drill
|
||||
Lawn mower
|
||||
Chainsaw
|
||||
Medium engine (mid frequency)
|
||||
Heavy engine (low frequency)
|
||||
Engine knocking
|
||||
Engine starting
|
||||
Idling
|
||||
Accelerating, revving, vroom
|
||||
Door
|
||||
Doorbell
|
||||
Ding-dong
|
||||
Sliding door
|
||||
Slam
|
||||
Knock
|
||||
Tap
|
||||
Squeak
|
||||
Cupboard open or close
|
||||
Drawer open or close
|
||||
Dishes, pots, and pans
|
||||
Cutlery, silverware
|
||||
Chopping (food)
|
||||
Frying (food)
|
||||
Microwave oven
|
||||
Blender
|
||||
Water tap, faucet
|
||||
Sink (filling or washing)
|
||||
Bathtub (filling or washing)
|
||||
Hair dryer
|
||||
Toilet flush
|
||||
Toothbrush
|
||||
Electric toothbrush
|
||||
Vacuum cleaner
|
||||
Zipper (clothing)
|
||||
Keys jangling
|
||||
Coin (dropping)
|
||||
Scissors
|
||||
Electric shaver, electric razor
|
||||
Shuffling cards
|
||||
Typing
|
||||
Typewriter
|
||||
Computer keyboard
|
||||
Writing
|
||||
Alarm
|
||||
Telephone
|
||||
Telephone bell ringing
|
||||
Ringtone
|
||||
Telephone dialing, DTMF
|
||||
Dial tone
|
||||
Busy signal
|
||||
Alarm clock
|
||||
Siren
|
||||
Civil defense siren
|
||||
Buzzer
|
||||
Smoke detector, smoke alarm
|
||||
Fire alarm
|
||||
Foghorn
|
||||
Whistle
|
||||
Steam whistle
|
||||
Mechanisms
|
||||
Ratchet, pawl
|
||||
Clock
|
||||
Tick
|
||||
Tick-tock
|
||||
Gears
|
||||
Pulleys
|
||||
Sewing machine
|
||||
Mechanical fan
|
||||
Air conditioning
|
||||
Cash register
|
||||
Printer
|
||||
Camera
|
||||
Single-lens reflex camera
|
||||
Tools
|
||||
Hammer
|
||||
Jackhammer
|
||||
Sawing
|
||||
Filing (rasp)
|
||||
Sanding
|
||||
Power tool
|
||||
Drill
|
||||
Explosion
|
||||
Gunshot, gunfire
|
||||
Machine gun
|
||||
Fusillade
|
||||
Artillery fire
|
||||
Cap gun
|
||||
Fireworks
|
||||
Firecracker
|
||||
Burst, pop
|
||||
Eruption
|
||||
Boom
|
||||
Wood
|
||||
Chop
|
||||
Splinter
|
||||
Crack
|
||||
Glass
|
||||
Chink, clink
|
||||
Shatter
|
||||
Liquid
|
||||
Splash, splatter
|
||||
Slosh
|
||||
Squish
|
||||
Drip
|
||||
Pour
|
||||
Trickle, dribble
|
||||
Gush
|
||||
Fill (with liquid)
|
||||
Spray
|
||||
Pump (liquid)
|
||||
Stir
|
||||
Boiling
|
||||
Sonar
|
||||
Arrow
|
||||
Whoosh, swoosh, swish
|
||||
Thump, thud
|
||||
Thunk
|
||||
Electronic tuner
|
||||
Effects unit
|
||||
Chorus effect
|
||||
Basketball bounce
|
||||
Bang
|
||||
Slap, smack
|
||||
Whack, thwack
|
||||
Smash, crash
|
||||
Breaking
|
||||
Bouncing
|
||||
Whip
|
||||
Flap
|
||||
Scratch
|
||||
Scrape
|
||||
Rub
|
||||
Roll
|
||||
Crushing
|
||||
Crumpling, crinkling
|
||||
Tearing
|
||||
Beep, bleep
|
||||
Ping
|
||||
Ding
|
||||
Clang
|
||||
Squeal
|
||||
Creak
|
||||
Rustle
|
||||
Whir
|
||||
Clatter
|
||||
Sizzle
|
||||
Clicking
|
||||
Clickety-clack
|
||||
Rumble
|
||||
Plop
|
||||
Jingle, tinkle
|
||||
Hum
|
||||
Zing
|
||||
Boing
|
||||
Crunch
|
||||
Silence
|
||||
Sine wave
|
||||
Harmonic
|
||||
Chirp tone
|
||||
Sound effect
|
||||
Pulse
|
||||
Inside, small room
|
||||
Inside, large room or hall
|
||||
Inside, public space
|
||||
Outside, urban or manmade
|
||||
Outside, rural or natural
|
||||
Reverberation
|
||||
Echo
|
||||
Noise
|
||||
Environmental noise
|
||||
Static
|
||||
Mains hum
|
||||
Distortion
|
||||
Sidetone
|
||||
Cacophony
|
||||
White noise
|
||||
Pink noise
|
||||
Throbbing
|
||||
Vibration
|
||||
Television
|
||||
Radio
|
||||
Field recording
|
||||
@ -1,23 +1,21 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Tuple, Union, Literal
|
||||
from typing_extensions import Annotated
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from pydantic import BaseModel, Extra, Field, validator
|
||||
import requests
|
||||
from pydantic import BaseModel, Extra, Field
|
||||
from pydantic.fields import PrivateAttr
|
||||
|
||||
from frigate.plus import PlusApi
|
||||
from frigate.util import load_labels
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModelTypeEnum(str, Enum):
|
||||
object = "object"
|
||||
audio = "audio"
|
||||
|
||||
|
||||
class PixelFormatEnum(str, Enum):
|
||||
rgb = "rgb"
|
||||
bgr = "bgr"
|
||||
@ -29,15 +27,33 @@ class InputTensorEnum(str, Enum):
|
||||
nhwc = "nhwc"
|
||||
|
||||
|
||||
class BaseModelConfig(BaseModel):
|
||||
type: str = Field(default="object", title="Model Type")
|
||||
path: Optional[str] = Field(title="Custom model path.")
|
||||
labelmap_path: Optional[str] = Field(title="Label map for custom model.")
|
||||
class ModelTypeEnum(str, Enum):
|
||||
ssd = "ssd"
|
||||
yolox = "yolox"
|
||||
yolov5 = "yolov5"
|
||||
yolov8 = "yolov8"
|
||||
|
||||
|
||||
class ModelConfig(BaseModel):
|
||||
path: Optional[str] = Field(title="Custom Object detection model path.")
|
||||
labelmap_path: Optional[str] = Field(title="Label map for custom object detector.")
|
||||
width: int = Field(default=320, title="Object detection model input width.")
|
||||
height: int = Field(default=320, title="Object detection model input height.")
|
||||
labelmap: Dict[int, str] = Field(
|
||||
default_factory=dict, title="Labelmap customization."
|
||||
)
|
||||
input_tensor: InputTensorEnum = Field(
|
||||
default=InputTensorEnum.nhwc, title="Model Input Tensor Shape"
|
||||
)
|
||||
input_pixel_format: PixelFormatEnum = Field(
|
||||
default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format"
|
||||
)
|
||||
model_type: ModelTypeEnum = Field(
|
||||
default=ModelTypeEnum.ssd, title="Object Detection Model Type"
|
||||
)
|
||||
_merged_labelmap: Optional[Dict[int, str]] = PrivateAttr()
|
||||
_colormap: Dict[int, Tuple[int, int, int]] = PrivateAttr()
|
||||
_model_hash: str = PrivateAttr()
|
||||
|
||||
@property
|
||||
def merged_labelmap(self) -> Dict[int, str]:
|
||||
@ -47,6 +63,10 @@ class BaseModelConfig(BaseModel):
|
||||
def colormap(self) -> Dict[int, Tuple[int, int, int]]:
|
||||
return self._colormap
|
||||
|
||||
@property
|
||||
def model_hash(self) -> str:
|
||||
return self._model_hash
|
||||
|
||||
def __init__(self, **config):
|
||||
super().__init__(**config)
|
||||
|
||||
@ -56,6 +76,55 @@ class BaseModelConfig(BaseModel):
|
||||
}
|
||||
self._colormap = {}
|
||||
|
||||
def check_and_load_plus_model(
|
||||
self, plus_api: PlusApi, detector: str = None
|
||||
) -> None:
|
||||
if not self.path or not self.path.startswith("plus://"):
|
||||
return
|
||||
|
||||
model_id = self.path[7:]
|
||||
self.path = f"/config/model_cache/{model_id}"
|
||||
model_info_path = f"{self.path}.json"
|
||||
|
||||
# download the model if it doesn't exist
|
||||
if not os.path.isfile(self.path):
|
||||
download_url = plus_api.get_model_download_url(model_id)
|
||||
r = requests.get(download_url)
|
||||
with open(self.path, "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
# download the model info if it doesn't exist
|
||||
if not os.path.isfile(model_info_path):
|
||||
model_info = plus_api.get_model_info(model_id)
|
||||
with open(model_info_path, "w") as f:
|
||||
json.dump(model_info, f)
|
||||
else:
|
||||
with open(model_info_path, "r") as f:
|
||||
model_info = json.load(f)
|
||||
|
||||
if detector and detector not in model_info["supportedDetectors"]:
|
||||
raise ValueError(f"Model does not support detector type of {detector}")
|
||||
|
||||
self.width = model_info["width"]
|
||||
self.height = model_info["height"]
|
||||
self.input_tensor = model_info["inputShape"]
|
||||
self.input_pixel_format = model_info["pixelFormat"]
|
||||
self.model_type = model_info["type"]
|
||||
self._merged_labelmap = {
|
||||
**{int(key): val for key, val in model_info["labelMap"].items()},
|
||||
**self.labelmap,
|
||||
}
|
||||
|
||||
def compute_model_hash(self) -> None:
|
||||
if not self.path or not os.path.exists(self.path):
|
||||
self._model_hash = hashlib.md5(b"unknown").hexdigest()
|
||||
else:
|
||||
with open(self.path, "rb") as f:
|
||||
file_hash = hashlib.md5()
|
||||
while chunk := f.read(8192):
|
||||
file_hash.update(chunk)
|
||||
self._model_hash = file_hash.hexdigest()
|
||||
|
||||
def create_colormap(self, enabled_labels: set[str]) -> None:
|
||||
"""Get a list of colors for enabled labels."""
|
||||
cmap = plt.cm.get_cmap("tab10", len(enabled_labels))
|
||||
@ -64,48 +133,15 @@ class BaseModelConfig(BaseModel):
|
||||
self._colormap[val] = tuple(int(round(255 * c)) for c in cmap(key)[:3])
|
||||
|
||||
class Config:
|
||||
extra = Extra.allow
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
|
||||
class ObjectModelConfig(BaseModelConfig):
|
||||
type: Literal["object"] = "object"
|
||||
width: int = Field(default=320, title="Object detection model input width.")
|
||||
height: int = Field(default=320, title="Object detection model input height.")
|
||||
input_tensor: InputTensorEnum = Field(
|
||||
default=InputTensorEnum.nhwc, title="Model Input Tensor Shape"
|
||||
)
|
||||
input_pixel_format: PixelFormatEnum = Field(
|
||||
default=PixelFormatEnum.rgb, title="Model Input Pixel Color Format"
|
||||
)
|
||||
|
||||
|
||||
class AudioModelConfig(BaseModelConfig):
|
||||
type: Literal["audio"] = "audio"
|
||||
duration: float = Field(default=0.975, title="Model Input Audio Duration")
|
||||
format: str = Field(default="s16le", title="Model Input Audio Format")
|
||||
sample_rate: int = Field(default=16000, title="Model Input Sample Rate")
|
||||
channels: int = Field(default=1, title="Model Input Number of Channels")
|
||||
|
||||
def __init__(self, **config):
|
||||
super().__init__(**config)
|
||||
|
||||
self._merged_labelmap = {
|
||||
**load_labels(config.get("labelmap_path", "/yamnet_label_list.txt")),
|
||||
**config.get("labelmap", {}),
|
||||
}
|
||||
|
||||
|
||||
ModelConfig = Annotated[
|
||||
Union[tuple(BaseModelConfig.__subclasses__())],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
extra = Extra.forbid
|
||||
|
||||
|
||||
class BaseDetectorConfig(BaseModel):
|
||||
# the type field must be defined in all subclasses
|
||||
type: str = Field(default="cpu", title="Detector Type")
|
||||
model: Optional[ModelConfig]
|
||||
model: ModelConfig = Field(
|
||||
default=None, title="Detector specific model configuration."
|
||||
)
|
||||
|
||||
class Config:
|
||||
extra = Extra.allow
|
||||
|
||||
Loading…
Reference in New Issue
Block a user