diff --git a/frigate/config/camera/camera.py b/frigate/config/camera/camera.py index 3b24dabac..33ad312a2 100644 --- a/frigate/config/camera/camera.py +++ b/frigate/config/camera/camera.py @@ -19,6 +19,7 @@ from frigate.util.builtin import ( from ..base import FrigateBaseModel from ..classification import ( + AudioTranscriptionConfig, CameraFaceRecognitionConfig, CameraLicensePlateRecognitionConfig, ) @@ -56,6 +57,9 @@ class CameraConfig(FrigateBaseModel): audio: AudioConfig = Field( default_factory=AudioConfig, title="Audio events configuration." ) + audio_transcription: AudioTranscriptionConfig = Field( + default_factory=AudioTranscriptionConfig, title="Audio transcription config." + ) birdseye: BirdseyeCameraConfig = Field( default_factory=BirdseyeCameraConfig, title="Birdseye camera configuration." ) diff --git a/frigate/config/camera/updater.py b/frigate/config/camera/updater.py index 140e02207..5ddc26d44 100644 --- a/frigate/config/camera/updater.py +++ b/frigate/config/camera/updater.py @@ -12,6 +12,7 @@ class CameraConfigUpdateEnum(str, Enum): """Supported camera config update types.""" audio = "audio" + audio_transcription = "audio_transcription" birdseye = "birdseye" detect = "detect" enabled = "enabled" @@ -74,6 +75,8 @@ class CameraConfigUpdateSubscriber: if update_type == CameraConfigUpdateEnum.audio: config.audio = updated_config + if update_type == CameraConfigUpdateEnum.audio_transcription: + config.audio_transcription = updated_config elif update_type == CameraConfigUpdateEnum.birdseye: config.birdseye = updated_config elif update_type == CameraConfigUpdateEnum.detect: diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 4af60df4f..29568f5cd 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -19,11 +19,32 @@ class SemanticSearchModelEnum(str, Enum): jinav2 = "jinav2" -class LPRDeviceEnum(str, Enum): +class EnrichmentsDeviceEnum(str, Enum): GPU = "GPU" CPU = "CPU" +class AudioTranscriptionConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable audio transcription.") + language: str = Field( + default="en", + title="Language abbreviation to use for audio event transcription/translation.", + ) + device: Optional[EnrichmentsDeviceEnum] = Field( + default=EnrichmentsDeviceEnum.CPU, + title="The device used for license plate recognition.", + ) + model_size: str = Field( + default="small", title="The size of the embeddings model used." + ) + enabled_in_config: Optional[bool] = Field( + default=None, title="Keep track of original state of camera." + ) + live_enabled: Optional[bool] = Field( + default=False, title="Enable live transcriptions." + ) + + class BirdClassificationConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable bird classification.") threshold: float = Field( @@ -144,8 +165,8 @@ class CameraFaceRecognitionConfig(FrigateBaseModel): class LicensePlateRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable license plate recognition.") - device: Optional[LPRDeviceEnum] = Field( - default=LPRDeviceEnum.CPU, + device: Optional[EnrichmentsDeviceEnum] = Field( + default=EnrichmentsDeviceEnum.CPU, title="The device used for license plate recognition.", ) model_size: str = Field( diff --git a/frigate/config/config.py b/frigate/config/config.py index 58427f5d5..7c7eeb1e7 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -54,6 +54,7 @@ from .camera.snapshots import SnapshotsConfig from .camera.timestamp import TimestampStyleConfig from .camera_group import CameraGroupConfig from .classification import ( + AudioTranscriptionConfig, ClassificationConfig, FaceRecognitionConfig, LicensePlateRecognitionConfig, @@ -419,6 +420,9 @@ class FrigateConfig(FrigateBaseModel): ) # Classification Config + audio_transcription: AudioTranscriptionConfig = Field( + default_factory=AudioTranscriptionConfig, title="Audio transcription config." + ) classification: ClassificationConfig = Field( default_factory=ClassificationConfig, title="Object classification config." ) @@ -472,6 +476,7 @@ class FrigateConfig(FrigateBaseModel): global_config = self.model_dump( include={ "audio": ..., + "audio_transcription": ..., "birdseye": ..., "face_recognition": ..., "lpr": ..., @@ -528,6 +533,7 @@ class FrigateConfig(FrigateBaseModel): allowed_fields_map = { "face_recognition": ["enabled", "min_area"], "lpr": ["enabled", "expire_time", "min_area", "enhancement"], + "audio_transcription": ["enabled", "live_enabled"], } for section in allowed_fields_map: @@ -609,6 +615,9 @@ class FrigateConfig(FrigateBaseModel): # set config pre-value camera_config.enabled_in_config = camera_config.enabled camera_config.audio.enabled_in_config = camera_config.audio.enabled + camera_config.audio_transcription.enabled_in_config = ( + camera_config.audio_transcription.enabled + ) camera_config.record.enabled_in_config = camera_config.record.enabled camera_config.notifications.enabled_in_config = ( camera_config.notifications.enabled