mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-07 11:45:24 +03:00
initial implementation of Google Gemini captions
This commit is contained in:
parent
e390533760
commit
af7cfee82d
@ -15,13 +15,13 @@ services:
|
|||||||
# Use target devcontainer-trt for TensorRT dev
|
# Use target devcontainer-trt for TensorRT dev
|
||||||
target: devcontainer
|
target: devcontainer
|
||||||
## Uncomment this block for nvidia gpu support
|
## Uncomment this block for nvidia gpu support
|
||||||
# deploy:
|
deploy:
|
||||||
# resources:
|
resources:
|
||||||
# reservations:
|
reservations:
|
||||||
# devices:
|
devices:
|
||||||
# - driver: nvidia
|
- driver: nvidia
|
||||||
# count: 1
|
count: 1
|
||||||
# capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
environment:
|
environment:
|
||||||
YOLO_MODELS: yolov7-320
|
YOLO_MODELS: yolov7-320
|
||||||
devices:
|
devices:
|
||||||
|
|||||||
@ -27,3 +27,5 @@ unidecode == 1.3.*
|
|||||||
# Openvino Library - Custom built with MYRIAD support
|
# Openvino Library - Custom built with MYRIAD support
|
||||||
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_machine == 'x86_64'
|
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_machine == 'x86_64'
|
||||||
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-linux_aarch64.whl; platform_machine == 'aarch64'
|
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-linux_aarch64.whl; platform_machine == 'aarch64'
|
||||||
|
# Google Generative AI
|
||||||
|
google-generativeai == 0.3.*
|
||||||
@ -35,6 +35,7 @@ from frigate.events.audio import listen_to_audio
|
|||||||
from frigate.events.cleanup import EventCleanup
|
from frigate.events.cleanup import EventCleanup
|
||||||
from frigate.events.external import ExternalEventProcessor
|
from frigate.events.external import ExternalEventProcessor
|
||||||
from frigate.events.maintainer import EventProcessor
|
from frigate.events.maintainer import EventProcessor
|
||||||
|
from frigate.gemini import GeminiProcessor
|
||||||
from frigate.http import create_app
|
from frigate.http import create_app
|
||||||
from frigate.log import log_process, root_configurer
|
from frigate.log import log_process, root_configurer
|
||||||
from frigate.models import Event, Recordings, RecordingsToDelete, Regions, Timeline
|
from frigate.models import Event, Recordings, RecordingsToDelete, Regions, Timeline
|
||||||
@ -266,6 +267,9 @@ class FrigateApp:
|
|||||||
# Queue for timeline events
|
# Queue for timeline events
|
||||||
self.timeline_queue: Queue = mp.Queue()
|
self.timeline_queue: Queue = mp.Queue()
|
||||||
|
|
||||||
|
# Queue for Google Gemini events
|
||||||
|
self.gemini_queue: Queue = mp.Queue()
|
||||||
|
|
||||||
# Queue for inter process communication
|
# Queue for inter process communication
|
||||||
self.inter_process_queue: Queue = mp.Queue()
|
self.inter_process_queue: Queue = mp.Queue()
|
||||||
|
|
||||||
@ -576,6 +580,12 @@ class FrigateApp:
|
|||||||
)
|
)
|
||||||
self.timeline_processor.start()
|
self.timeline_processor.start()
|
||||||
|
|
||||||
|
def start_gemini_processor(self) -> None:
|
||||||
|
self.gemini_processor = GeminiProcessor(
|
||||||
|
self.config, self.gemini_queue, self.stop_event
|
||||||
|
)
|
||||||
|
self.gemini_processor.start()
|
||||||
|
|
||||||
def start_event_processor(self) -> None:
|
def start_event_processor(self) -> None:
|
||||||
self.event_processor = EventProcessor(
|
self.event_processor = EventProcessor(
|
||||||
self.config,
|
self.config,
|
||||||
@ -583,6 +593,7 @@ class FrigateApp:
|
|||||||
self.event_queue,
|
self.event_queue,
|
||||||
self.event_processed_queue,
|
self.event_processed_queue,
|
||||||
self.timeline_queue,
|
self.timeline_queue,
|
||||||
|
self.gemini_queue,
|
||||||
self.stop_event,
|
self.stop_event,
|
||||||
)
|
)
|
||||||
self.event_processor.start()
|
self.event_processor.start()
|
||||||
@ -692,6 +703,7 @@ class FrigateApp:
|
|||||||
self.init_external_event_processor()
|
self.init_external_event_processor()
|
||||||
self.init_web_server()
|
self.init_web_server()
|
||||||
self.start_timeline_processor()
|
self.start_timeline_processor()
|
||||||
|
self.start_gemini_processor()
|
||||||
self.start_event_processor()
|
self.start_event_processor()
|
||||||
self.start_event_cleanup()
|
self.start_event_cleanup()
|
||||||
self.start_record_cleanup()
|
self.start_record_cleanup()
|
||||||
@ -734,6 +746,7 @@ class FrigateApp:
|
|||||||
self.record_cleanup.join()
|
self.record_cleanup.join()
|
||||||
self.stats_emitter.join()
|
self.stats_emitter.join()
|
||||||
self.frigate_watchdog.join()
|
self.frigate_watchdog.join()
|
||||||
|
self.gemini_processor.join()
|
||||||
self.db.stop()
|
self.db.stop()
|
||||||
|
|
||||||
while len(self.detection_shms) > 0:
|
while len(self.detection_shms) > 0:
|
||||||
|
|||||||
@ -382,6 +382,26 @@ class DetectConfig(FrigateBaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiConfig(FrigateBaseModel):
|
||||||
|
enabled: bool = Field(default=False, title="Enable Google Gemini captioning.")
|
||||||
|
override_existing: bool = Field(
|
||||||
|
default=False, title="Override existing sub labels."
|
||||||
|
)
|
||||||
|
api_key: str = Field(default="", title="Google AI Studio API Key.")
|
||||||
|
model: str = Field(default="gemini-pro-vision", title="Google AI Studio Model.")
|
||||||
|
prompt: str = Field(
|
||||||
|
default="Caption this image with as much detail as possible. Make sure the response is under 90 characters.",
|
||||||
|
title="Default caption prompt.",
|
||||||
|
)
|
||||||
|
object_prompts: Dict[str, str] = Field(
|
||||||
|
default={
|
||||||
|
"person": "Describe the main person in the image (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from. Make sure the response is under 90 characters.",
|
||||||
|
"car": "Label the primary vehicle in the image with just the name of the company if it is a delivery vehicle, or the color make and model.",
|
||||||
|
},
|
||||||
|
title="Object specific prompts.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FilterConfig(FrigateBaseModel):
|
class FilterConfig(FrigateBaseModel):
|
||||||
min_area: int = Field(
|
min_area: int = Field(
|
||||||
default=0, title="Minimum area of bounding box for object to be counted."
|
default=0, title="Minimum area of bounding box for object to be counted."
|
||||||
@ -780,6 +800,9 @@ class CameraConfig(FrigateBaseModel):
|
|||||||
onvif: OnvifConfig = Field(
|
onvif: OnvifConfig = Field(
|
||||||
default_factory=OnvifConfig, title="Camera Onvif Configuration."
|
default_factory=OnvifConfig, title="Camera Onvif Configuration."
|
||||||
)
|
)
|
||||||
|
gemini: GeminiConfig = Field(
|
||||||
|
default_factory=GeminiConfig, title="Google Gemini Configuration."
|
||||||
|
)
|
||||||
ui: CameraUiConfig = Field(
|
ui: CameraUiConfig = Field(
|
||||||
default_factory=CameraUiConfig, title="Camera UI Modifications."
|
default_factory=CameraUiConfig, title="Camera UI Modifications."
|
||||||
)
|
)
|
||||||
@ -1092,6 +1115,9 @@ class FrigateConfig(FrigateBaseModel):
|
|||||||
detect: DetectConfig = Field(
|
detect: DetectConfig = Field(
|
||||||
default_factory=DetectConfig, title="Global object tracking configuration."
|
default_factory=DetectConfig, title="Global object tracking configuration."
|
||||||
)
|
)
|
||||||
|
gemini: GeminiConfig = Field(
|
||||||
|
default_factory=GeminiConfig, title="Global Google Gemini Configuration."
|
||||||
|
)
|
||||||
cameras: Dict[str, CameraConfig] = Field(title="Camera configuration.")
|
cameras: Dict[str, CameraConfig] = Field(title="Camera configuration.")
|
||||||
timestamp_style: TimestampStyleConfig = Field(
|
timestamp_style: TimestampStyleConfig = Field(
|
||||||
default_factory=TimestampStyleConfig,
|
default_factory=TimestampStyleConfig,
|
||||||
@ -1107,6 +1133,10 @@ class FrigateConfig(FrigateBaseModel):
|
|||||||
config.mqtt.user = config.mqtt.user.format(**FRIGATE_ENV_VARS)
|
config.mqtt.user = config.mqtt.user.format(**FRIGATE_ENV_VARS)
|
||||||
config.mqtt.password = config.mqtt.password.format(**FRIGATE_ENV_VARS)
|
config.mqtt.password = config.mqtt.password.format(**FRIGATE_ENV_VARS)
|
||||||
|
|
||||||
|
# Gemini API Key substitutions
|
||||||
|
if config.gemini.api_key:
|
||||||
|
config.gemini.api_key = config.gemini.api_key.format(**FRIGATE_ENV_VARS)
|
||||||
|
|
||||||
# set default min_score for object attributes
|
# set default min_score for object attributes
|
||||||
for attribute in ALL_ATTRIBUTE_LABELS:
|
for attribute in ALL_ATTRIBUTE_LABELS:
|
||||||
if not config.objects.filters.get(attribute):
|
if not config.objects.filters.get(attribute):
|
||||||
@ -1128,6 +1158,7 @@ class FrigateConfig(FrigateBaseModel):
|
|||||||
"detect": ...,
|
"detect": ...,
|
||||||
"ffmpeg": ...,
|
"ffmpeg": ...,
|
||||||
"timestamp_style": ...,
|
"timestamp_style": ...,
|
||||||
|
"gemini": ...,
|
||||||
},
|
},
|
||||||
exclude_unset=True,
|
exclude_unset=True,
|
||||||
)
|
)
|
||||||
@ -1194,6 +1225,13 @@ class FrigateConfig(FrigateBaseModel):
|
|||||||
camera_config.onvif.password = camera_config.onvif.password.format(
|
camera_config.onvif.password = camera_config.onvif.password.format(
|
||||||
**FRIGATE_ENV_VARS
|
**FRIGATE_ENV_VARS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Gemini substitution
|
||||||
|
if camera_config.gemini.api_key:
|
||||||
|
camera_config.gemini.api_key = camera_config.gemini.api_key.format(
|
||||||
|
**FRIGATE_ENV_VARS
|
||||||
|
)
|
||||||
|
|
||||||
# set config pre-value
|
# set config pre-value
|
||||||
camera_config.record.enabled_in_config = camera_config.record.enabled
|
camera_config.record.enabled_in_config = camera_config.record.enabled
|
||||||
camera_config.audio.enabled_in_config = camera_config.audio.enabled
|
camera_config.audio.enabled_in_config = camera_config.audio.enabled
|
||||||
|
|||||||
@ -32,6 +32,7 @@ def should_update_db(prev_event: Event, current_event: Event) -> bool:
|
|||||||
or prev_event["entered_zones"] != current_event["entered_zones"]
|
or prev_event["entered_zones"] != current_event["entered_zones"]
|
||||||
or prev_event["thumbnail"] != current_event["thumbnail"]
|
or prev_event["thumbnail"] != current_event["thumbnail"]
|
||||||
or prev_event["end_time"] != current_event["end_time"]
|
or prev_event["end_time"] != current_event["end_time"]
|
||||||
|
or prev_event["sub_label"] != current_event["sub_label"]
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@ -56,6 +57,7 @@ class EventProcessor(threading.Thread):
|
|||||||
event_queue: Queue,
|
event_queue: Queue,
|
||||||
event_processed_queue: Queue,
|
event_processed_queue: Queue,
|
||||||
timeline_queue: Queue,
|
timeline_queue: Queue,
|
||||||
|
gemini_queue: Queue,
|
||||||
stop_event: MpEvent,
|
stop_event: MpEvent,
|
||||||
):
|
):
|
||||||
threading.Thread.__init__(self)
|
threading.Thread.__init__(self)
|
||||||
@ -65,6 +67,7 @@ class EventProcessor(threading.Thread):
|
|||||||
self.event_queue = event_queue
|
self.event_queue = event_queue
|
||||||
self.event_processed_queue = event_processed_queue
|
self.event_processed_queue = event_processed_queue
|
||||||
self.timeline_queue = timeline_queue
|
self.timeline_queue = timeline_queue
|
||||||
|
self.gemini_queue = gemini_queue
|
||||||
self.events_in_process: Dict[str, Event] = {}
|
self.events_in_process: Dict[str, Event] = {}
|
||||||
self.stop_event = stop_event
|
self.stop_event = stop_event
|
||||||
|
|
||||||
@ -102,6 +105,14 @@ class EventProcessor(threading.Thread):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
self.handle_object_detection(event_type, camera, event_data)
|
self.handle_object_detection(event_type, camera, event_data)
|
||||||
|
|
||||||
|
if event_type == "end" and self.config.cameras[camera].gemini.enabled:
|
||||||
|
self.gemini_queue.put(
|
||||||
|
(
|
||||||
|
camera,
|
||||||
|
event_data,
|
||||||
|
)
|
||||||
|
)
|
||||||
elif source_type == EventTypeEnum.api:
|
elif source_type == EventTypeEnum.api:
|
||||||
self.handle_external_detection(event_type, event_data)
|
self.handle_external_detection(event_type, event_data)
|
||||||
|
|
||||||
|
|||||||
@ -797,11 +797,10 @@ function Event({
|
|||||||
</div>
|
</div>
|
||||||
<div className="m-2 flex grow">
|
<div className="m-2 flex grow">
|
||||||
<div className="flex flex-col grow">
|
<div className="flex flex-col grow">
|
||||||
<div className="capitalize text-lg font-bold">
|
<div className="capitalize text-lg font-bold">{event.label.replaceAll('_', ' ')}</div>
|
||||||
{event.label.replaceAll('_', ' ')}
|
<div className="text-sm flex pb-2">
|
||||||
{event.sub_label ? `: ${event.sub_label.replaceAll('_', ' ')}` : null}
|
{event.sub_label ? `${event.sub_label.replaceAll('_', ' ')}` : null}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="text-sm flex">
|
<div className="text-sm flex">
|
||||||
<Clock className="h-5 w-5 mr-2 inline" />
|
<Clock className="h-5 w-5 mr-2 inline" />
|
||||||
{formatUnixTimestampToDateTime(event.start_time, { ...config.ui })}
|
{formatUnixTimestampToDateTime(event.start_time, { ...config.ui })}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user