mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-07 11:45:24 +03:00
initial implementation of Google Gemini captions
This commit is contained in:
parent
e390533760
commit
af7cfee82d
@ -15,13 +15,13 @@ services:
|
||||
# Use target devcontainer-trt for TensorRT dev
|
||||
target: devcontainer
|
||||
## Uncomment this block for nvidia gpu support
|
||||
# deploy:
|
||||
# resources:
|
||||
# reservations:
|
||||
# devices:
|
||||
# - driver: nvidia
|
||||
# count: 1
|
||||
# capabilities: [gpu]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
YOLO_MODELS: yolov7-320
|
||||
devices:
|
||||
|
||||
@ -27,3 +27,5 @@ unidecode == 1.3.*
|
||||
# Openvino Library - Custom built with MYRIAD support
|
||||
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_machine == 'x86_64'
|
||||
openvino @ https://github.com/NateMeyer/openvino-wheels/releases/download/multi-arch_2022.3.1/openvino-2022.3.1-1-cp39-cp39-linux_aarch64.whl; platform_machine == 'aarch64'
|
||||
# Google Generative AI
|
||||
google-generativeai == 0.3.*
|
||||
@ -35,6 +35,7 @@ from frigate.events.audio import listen_to_audio
|
||||
from frigate.events.cleanup import EventCleanup
|
||||
from frigate.events.external import ExternalEventProcessor
|
||||
from frigate.events.maintainer import EventProcessor
|
||||
from frigate.gemini import GeminiProcessor
|
||||
from frigate.http import create_app
|
||||
from frigate.log import log_process, root_configurer
|
||||
from frigate.models import Event, Recordings, RecordingsToDelete, Regions, Timeline
|
||||
@ -266,6 +267,9 @@ class FrigateApp:
|
||||
# Queue for timeline events
|
||||
self.timeline_queue: Queue = mp.Queue()
|
||||
|
||||
# Queue for Google Gemini events
|
||||
self.gemini_queue: Queue = mp.Queue()
|
||||
|
||||
# Queue for inter process communication
|
||||
self.inter_process_queue: Queue = mp.Queue()
|
||||
|
||||
@ -576,6 +580,12 @@ class FrigateApp:
|
||||
)
|
||||
self.timeline_processor.start()
|
||||
|
||||
def start_gemini_processor(self) -> None:
|
||||
self.gemini_processor = GeminiProcessor(
|
||||
self.config, self.gemini_queue, self.stop_event
|
||||
)
|
||||
self.gemini_processor.start()
|
||||
|
||||
def start_event_processor(self) -> None:
|
||||
self.event_processor = EventProcessor(
|
||||
self.config,
|
||||
@ -583,6 +593,7 @@ class FrigateApp:
|
||||
self.event_queue,
|
||||
self.event_processed_queue,
|
||||
self.timeline_queue,
|
||||
self.gemini_queue,
|
||||
self.stop_event,
|
||||
)
|
||||
self.event_processor.start()
|
||||
@ -692,6 +703,7 @@ class FrigateApp:
|
||||
self.init_external_event_processor()
|
||||
self.init_web_server()
|
||||
self.start_timeline_processor()
|
||||
self.start_gemini_processor()
|
||||
self.start_event_processor()
|
||||
self.start_event_cleanup()
|
||||
self.start_record_cleanup()
|
||||
@ -734,6 +746,7 @@ class FrigateApp:
|
||||
self.record_cleanup.join()
|
||||
self.stats_emitter.join()
|
||||
self.frigate_watchdog.join()
|
||||
self.gemini_processor.join()
|
||||
self.db.stop()
|
||||
|
||||
while len(self.detection_shms) > 0:
|
||||
|
||||
@ -382,6 +382,26 @@ class DetectConfig(FrigateBaseModel):
|
||||
)
|
||||
|
||||
|
||||
class GeminiConfig(FrigateBaseModel):
|
||||
enabled: bool = Field(default=False, title="Enable Google Gemini captioning.")
|
||||
override_existing: bool = Field(
|
||||
default=False, title="Override existing sub labels."
|
||||
)
|
||||
api_key: str = Field(default="", title="Google AI Studio API Key.")
|
||||
model: str = Field(default="gemini-pro-vision", title="Google AI Studio Model.")
|
||||
prompt: str = Field(
|
||||
default="Caption this image with as much detail as possible. Make sure the response is under 90 characters.",
|
||||
title="Default caption prompt.",
|
||||
)
|
||||
object_prompts: Dict[str, str] = Field(
|
||||
default={
|
||||
"person": "Describe the main person in the image (gender, age, clothing, activity, etc). Do not include where the activity is occurring (sidewalk, concrete, driveway, etc). If delivering a package, include the company the package is from. Make sure the response is under 90 characters.",
|
||||
"car": "Label the primary vehicle in the image with just the name of the company if it is a delivery vehicle, or the color make and model.",
|
||||
},
|
||||
title="Object specific prompts.",
|
||||
)
|
||||
|
||||
|
||||
class FilterConfig(FrigateBaseModel):
|
||||
min_area: int = Field(
|
||||
default=0, title="Minimum area of bounding box for object to be counted."
|
||||
@ -780,6 +800,9 @@ class CameraConfig(FrigateBaseModel):
|
||||
onvif: OnvifConfig = Field(
|
||||
default_factory=OnvifConfig, title="Camera Onvif Configuration."
|
||||
)
|
||||
gemini: GeminiConfig = Field(
|
||||
default_factory=GeminiConfig, title="Google Gemini Configuration."
|
||||
)
|
||||
ui: CameraUiConfig = Field(
|
||||
default_factory=CameraUiConfig, title="Camera UI Modifications."
|
||||
)
|
||||
@ -1092,6 +1115,9 @@ class FrigateConfig(FrigateBaseModel):
|
||||
detect: DetectConfig = Field(
|
||||
default_factory=DetectConfig, title="Global object tracking configuration."
|
||||
)
|
||||
gemini: GeminiConfig = Field(
|
||||
default_factory=GeminiConfig, title="Global Google Gemini Configuration."
|
||||
)
|
||||
cameras: Dict[str, CameraConfig] = Field(title="Camera configuration.")
|
||||
timestamp_style: TimestampStyleConfig = Field(
|
||||
default_factory=TimestampStyleConfig,
|
||||
@ -1107,6 +1133,10 @@ class FrigateConfig(FrigateBaseModel):
|
||||
config.mqtt.user = config.mqtt.user.format(**FRIGATE_ENV_VARS)
|
||||
config.mqtt.password = config.mqtt.password.format(**FRIGATE_ENV_VARS)
|
||||
|
||||
# Gemini API Key substitutions
|
||||
if config.gemini.api_key:
|
||||
config.gemini.api_key = config.gemini.api_key.format(**FRIGATE_ENV_VARS)
|
||||
|
||||
# set default min_score for object attributes
|
||||
for attribute in ALL_ATTRIBUTE_LABELS:
|
||||
if not config.objects.filters.get(attribute):
|
||||
@ -1128,6 +1158,7 @@ class FrigateConfig(FrigateBaseModel):
|
||||
"detect": ...,
|
||||
"ffmpeg": ...,
|
||||
"timestamp_style": ...,
|
||||
"gemini": ...,
|
||||
},
|
||||
exclude_unset=True,
|
||||
)
|
||||
@ -1194,6 +1225,13 @@ class FrigateConfig(FrigateBaseModel):
|
||||
camera_config.onvif.password = camera_config.onvif.password.format(
|
||||
**FRIGATE_ENV_VARS
|
||||
)
|
||||
|
||||
# Gemini substitution
|
||||
if camera_config.gemini.api_key:
|
||||
camera_config.gemini.api_key = camera_config.gemini.api_key.format(
|
||||
**FRIGATE_ENV_VARS
|
||||
)
|
||||
|
||||
# set config pre-value
|
||||
camera_config.record.enabled_in_config = camera_config.record.enabled
|
||||
camera_config.audio.enabled_in_config = camera_config.audio.enabled
|
||||
|
||||
@ -32,6 +32,7 @@ def should_update_db(prev_event: Event, current_event: Event) -> bool:
|
||||
or prev_event["entered_zones"] != current_event["entered_zones"]
|
||||
or prev_event["thumbnail"] != current_event["thumbnail"]
|
||||
or prev_event["end_time"] != current_event["end_time"]
|
||||
or prev_event["sub_label"] != current_event["sub_label"]
|
||||
):
|
||||
return True
|
||||
return False
|
||||
@ -56,6 +57,7 @@ class EventProcessor(threading.Thread):
|
||||
event_queue: Queue,
|
||||
event_processed_queue: Queue,
|
||||
timeline_queue: Queue,
|
||||
gemini_queue: Queue,
|
||||
stop_event: MpEvent,
|
||||
):
|
||||
threading.Thread.__init__(self)
|
||||
@ -65,6 +67,7 @@ class EventProcessor(threading.Thread):
|
||||
self.event_queue = event_queue
|
||||
self.event_processed_queue = event_processed_queue
|
||||
self.timeline_queue = timeline_queue
|
||||
self.gemini_queue = gemini_queue
|
||||
self.events_in_process: Dict[str, Event] = {}
|
||||
self.stop_event = stop_event
|
||||
|
||||
@ -102,6 +105,14 @@ class EventProcessor(threading.Thread):
|
||||
continue
|
||||
|
||||
self.handle_object_detection(event_type, camera, event_data)
|
||||
|
||||
if event_type == "end" and self.config.cameras[camera].gemini.enabled:
|
||||
self.gemini_queue.put(
|
||||
(
|
||||
camera,
|
||||
event_data,
|
||||
)
|
||||
)
|
||||
elif source_type == EventTypeEnum.api:
|
||||
self.handle_external_detection(event_type, event_data)
|
||||
|
||||
|
||||
@ -797,11 +797,10 @@ function Event({
|
||||
</div>
|
||||
<div className="m-2 flex grow">
|
||||
<div className="flex flex-col grow">
|
||||
<div className="capitalize text-lg font-bold">
|
||||
{event.label.replaceAll('_', ' ')}
|
||||
{event.sub_label ? `: ${event.sub_label.replaceAll('_', ' ')}` : null}
|
||||
<div className="capitalize text-lg font-bold">{event.label.replaceAll('_', ' ')}</div>
|
||||
<div className="text-sm flex pb-2">
|
||||
{event.sub_label ? `${event.sub_label.replaceAll('_', ' ')}` : null}
|
||||
</div>
|
||||
|
||||
<div className="text-sm flex">
|
||||
<Clock className="h-5 w-5 mr-2 inline" />
|
||||
{formatUnixTimestampToDateTime(event.start_time, { ...config.ui })}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user