diff --git a/docker-compose.yml b/docker-compose.yml index a4d349194..5781f1ff1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,13 +15,13 @@ services: # Use target devcontainer-trt for TensorRT dev target: devcontainer ## Uncomment this block for nvidia gpu support - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] environment: YOLO_MODELS: yolov7-320 devices: diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index f4167744e..0591a8c0d 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -27,3 +27,5 @@ unidecode == 1.3.* # Openvino Library - Custom built with MYRIAD support openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_machine == 'x86_64' openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-linux_aarch64.whl; platform_machine == 'aarch64' +# Google Generative AI +google-generativeai == 0.3.* \ No newline at end of file diff --git a/frigate/app.py b/frigate/app.py index 1535eeaf6..43d1533c5 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -35,6 +35,7 @@ from frigate.events.audio import listen_to_audio from frigate.events.cleanup import EventCleanup from frigate.events.external import ExternalEventProcessor from frigate.events.maintainer import EventProcessor +from frigate.gemini import GeminiProcessor from frigate.http import create_app from frigate.log import log_process, root_configurer from frigate.models import Event, Recordings, RecordingsToDelete, Regions, Timeline @@ -266,6 +267,9 @@ class FrigateApp: # Queue for timeline events self.timeline_queue: Queue = mp.Queue() + # Queue for Google Gemini events + self.gemini_queue: Queue = mp.Queue() + # Queue for inter process communication self.inter_process_queue: Queue = mp.Queue() @@ -576,6 +580,12 @@ class FrigateApp: ) self.timeline_processor.start() + def start_gemini_processor(self) -> None: + self.gemini_processor = GeminiProcessor( + self.config, self.gemini_queue, self.stop_event + ) + self.gemini_processor.start() + def start_event_processor(self) -> None: self.event_processor = EventProcessor( self.config, @@ -583,6 +593,7 @@ class FrigateApp: self.event_queue, self.event_processed_queue, self.timeline_queue, + self.gemini_queue, self.stop_event, ) self.event_processor.start() @@ -692,6 +703,7 @@ class FrigateApp: self.init_external_event_processor() self.init_web_server() self.start_timeline_processor() + self.start_gemini_processor() self.start_event_processor() self.start_event_cleanup() self.start_record_cleanup() @@ -734,6 +746,7 @@ class FrigateApp: self.record_cleanup.join() self.stats_emitter.join() self.frigate_watchdog.join() + self.gemini_processor.join() self.db.stop() while len(self.detection_shms) > 0: diff --git a/frigate/config.py b/frigate/config.py index 6760ea5e6..9ea44189a 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -382,6 +382,26 @@ class DetectConfig(FrigateBaseModel): ) +class GeminiConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable Google Gemini captioning.") + override_existing: bool = Field( + default=False, title="Override existing sub labels." + ) + api_key: str = Field(default="", title="Google AI Studio API Key.") + model: str = Field(default="gemini-pro-vision", title="Google AI Studio Model.") + prompt: str = Field( + default="Caption this image with as much detail as possible. Make sure the response is under 90 characters.", + title="Default caption prompt.", + ) + object_prompts: Dict[str, str] = Field( + default={ + "person": "Describe the main person in the image (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from. Make sure the response is under 90 characters.", + "car": "Label the primary vehicle in the image with just the name of the company if it is a delivery vehicle, or the color make and model.", + }, + title="Object specific prompts.", + ) + + class FilterConfig(FrigateBaseModel): min_area: int = Field( default=0, title="Minimum area of bounding box for object to be counted." @@ -780,6 +800,9 @@ class CameraConfig(FrigateBaseModel): onvif: OnvifConfig = Field( default_factory=OnvifConfig, title="Camera Onvif Configuration." ) + gemini: GeminiConfig = Field( + default_factory=GeminiConfig, title="Google Gemini Configuration." + ) ui: CameraUiConfig = Field( default_factory=CameraUiConfig, title="Camera UI Modifications." ) @@ -1092,6 +1115,9 @@ class FrigateConfig(FrigateBaseModel): detect: DetectConfig = Field( default_factory=DetectConfig, title="Global object tracking configuration." ) + gemini: GeminiConfig = Field( + default_factory=GeminiConfig, title="Global Google Gemini Configuration." + ) cameras: Dict[str, CameraConfig] = Field(title="Camera configuration.") timestamp_style: TimestampStyleConfig = Field( default_factory=TimestampStyleConfig, @@ -1107,6 +1133,10 @@ class FrigateConfig(FrigateBaseModel): config.mqtt.user = config.mqtt.user.format(**FRIGATE_ENV_VARS) config.mqtt.password = config.mqtt.password.format(**FRIGATE_ENV_VARS) + # Gemini API Key substitutions + if config.gemini.api_key: + config.gemini.api_key = config.gemini.api_key.format(**FRIGATE_ENV_VARS) + # set default min_score for object attributes for attribute in ALL_ATTRIBUTE_LABELS: if not config.objects.filters.get(attribute): @@ -1128,6 +1158,7 @@ class FrigateConfig(FrigateBaseModel): "detect": ..., "ffmpeg": ..., "timestamp_style": ..., + "gemini": ..., }, exclude_unset=True, ) @@ -1194,6 +1225,13 @@ class FrigateConfig(FrigateBaseModel): camera_config.onvif.password = camera_config.onvif.password.format( **FRIGATE_ENV_VARS ) + + # Gemini substitution + if camera_config.gemini.api_key: + camera_config.gemini.api_key = camera_config.gemini.api_key.format( + **FRIGATE_ENV_VARS + ) + # set config pre-value camera_config.record.enabled_in_config = camera_config.record.enabled camera_config.audio.enabled_in_config = camera_config.audio.enabled diff --git a/frigate/events/maintainer.py b/frigate/events/maintainer.py index db8341656..685f47979 100644 --- a/frigate/events/maintainer.py +++ b/frigate/events/maintainer.py @@ -32,6 +32,7 @@ def should_update_db(prev_event: Event, current_event: Event) -> bool: or prev_event["entered_zones"] != current_event["entered_zones"] or prev_event["thumbnail"] != current_event["thumbnail"] or prev_event["end_time"] != current_event["end_time"] + or prev_event["sub_label"] != current_event["sub_label"] ): return True return False @@ -56,6 +57,7 @@ class EventProcessor(threading.Thread): event_queue: Queue, event_processed_queue: Queue, timeline_queue: Queue, + gemini_queue: Queue, stop_event: MpEvent, ): threading.Thread.__init__(self) @@ -65,6 +67,7 @@ class EventProcessor(threading.Thread): self.event_queue = event_queue self.event_processed_queue = event_processed_queue self.timeline_queue = timeline_queue + self.gemini_queue = gemini_queue self.events_in_process: Dict[str, Event] = {} self.stop_event = stop_event @@ -102,6 +105,14 @@ class EventProcessor(threading.Thread): continue self.handle_object_detection(event_type, camera, event_data) + + if event_type == "end" and self.config.cameras[camera].gemini.enabled: + self.gemini_queue.put( + ( + camera, + event_data, + ) + ) elif source_type == EventTypeEnum.api: self.handle_external_detection(event_type, event_data) diff --git a/web/src/routes/Events.jsx b/web/src/routes/Events.jsx index 2b2b546ef..4bcfc4ba1 100644 --- a/web/src/routes/Events.jsx +++ b/web/src/routes/Events.jsx @@ -797,11 +797,10 @@ function Event({