2024-06-22 00:30:19 +03:00
""" Generative AI module for Frigate. """
2025-08-13 01:27:35 +03:00
import datetime
2024-06-22 00:30:19 +03:00
import importlib
2024-11-16 00:24:17 +03:00
import logging
2024-06-22 00:30:19 +03:00
import os
2025-08-10 14:57:54 +03:00
import re
from typing import Any , Optional
2024-06-22 00:30:19 +03:00
2024-10-14 15:23:10 +03:00
from playhouse . shortcuts import model_to_dict
2024-11-09 16:48:53 +03:00
from frigate . config import CameraConfig , FrigateConfig , GenAIConfig , GenAIProviderEnum
2025-08-13 01:27:35 +03:00
from frigate . const import CLIPS_DIR
2025-08-10 14:57:54 +03:00
from frigate . data_processing . post . types import ReviewMetadata
2024-10-12 15:19:24 +03:00
from frigate . models import Event
2024-06-22 00:30:19 +03:00
2024-11-16 00:24:17 +03:00
logger = logging . getLogger ( __name__ )
2024-06-22 00:30:19 +03:00
PROVIDERS = { }
def register_genai_provider ( key : GenAIProviderEnum ) :
""" Register a GenAI provider. """
def decorator ( cls ) :
PROVIDERS [ key ] = cls
return cls
return decorator
class GenAIClient :
""" Generative AI client for Frigate. """
2025-10-02 21:48:11 +03:00
def __init__ ( self , genai_config : GenAIConfig , timeout : int = 120 ) - > None :
2024-06-22 00:30:19 +03:00
self . genai_config : GenAIConfig = genai_config
self . timeout = timeout
self . provider = self . _init_provider ( )
2025-08-10 14:57:54 +03:00
def generate_review_description (
2025-08-11 22:17:25 +03:00
self ,
review_data : dict [ str , Any ] ,
thumbnails : list [ bytes ] ,
concerns : list [ str ] ,
preferred_language : str | None ,
2025-08-13 01:27:35 +03:00
debug_save : bool ,
2025-10-01 02:07:16 +03:00
activity_context_prompt : str ,
2025-08-10 14:57:54 +03:00
) - > ReviewMetadata | None :
""" Generate a description for the review item activity. """
2025-08-11 22:17:25 +03:00
2025-08-13 18:28:01 +03:00
def get_concern_prompt ( ) - > str :
if concerns :
concern_list = " \n - " . join ( concerns )
2025-11-10 20:03:56 +03:00
return f """ - `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring:
2025-08-13 18:28:01 +03:00
- { concern_list } """
else :
return " "
2025-08-11 22:17:25 +03:00
2025-08-13 18:28:01 +03:00
def get_language_prompt ( ) - > str :
if preferred_language :
return f " Provide your answer in { preferred_language } "
else :
return " "
2025-08-11 22:17:25 +03:00
2025-10-27 00:37:57 +03:00
def get_objects_list ( ) - > str :
if review_data [ " unified_objects " ] :
return " \n - " + " \n - " . join ( review_data [ " unified_objects " ] )
2025-10-10 16:07:00 +03:00
else :
2025-10-27 00:37:57 +03:00
return " \n - (No objects detected) "
2025-10-10 16:07:00 +03:00
2025-08-10 14:57:54 +03:00
context_prompt = f """
2025-11-10 20:03:56 +03:00
Your task is to analyze the sequence of images ( { len ( thumbnails ) } total ) taken in chronological order from the perspective of the { review_data [ " camera " ] } security camera .
2025-08-11 22:17:25 +03:00
2025-10-26 00:40:04 +03:00
## Normal Activity Patterns for This Property
2025-10-27 00:37:57 +03:00
2025-10-02 18:17:25 +03:00
{ activity_context_prompt }
2025-10-26 00:40:04 +03:00
## Task Instructions
2025-09-30 15:52:38 +03:00
Your task is to provide a clear , accurate description of the scene that :
2025-08-13 01:27:35 +03:00
1. States exactly what is happening based on observable actions and movements .
2025-10-27 19:34:39 +03:00
2. Evaluates the activity against the Normal and Suspicious Activity Indicators above .
2025-10-30 17:52:55 +03:00
3. Assigns a potential_threat_level ( 0 , 1 , or 2 ) based on the threat level indicators defined above , applying them consistently .
2025-08-11 22:17:25 +03:00
2025-10-27 19:34:39 +03:00
* * Use the activity patterns above as guidance to calibrate your assessment . Match the activity against both normal and suspicious indicators , then use your judgment based on the complete context . * *
2025-08-11 22:17:25 +03:00
2025-10-26 00:40:04 +03:00
## Analysis Guidelines
2025-08-13 01:27:35 +03:00
When forming your description :
2025-10-27 00:37:57 +03:00
- * * CRITICAL : Only describe objects explicitly listed in " Objects in Scene " below . * * Do not infer or mention additional people , vehicles , or objects not present in this list , even if visual patterns suggest them . If only a car is listed , do not describe a person interacting with it unless " person " is also in the objects list .
2025-09-30 15:52:38 +03:00
- * * Only describe actions actually visible in the frames . * * Do not assume or infer actions that you don ' t observe happening. If someone walks toward furniture but you never see them sit, do not say they sat. Stick to what you can see across the sequence.
- Describe what you observe : actions , movements , interactions with objects and the environment . Include any observable environmental changes ( e . g . , lighting changes triggered by activity ) .
- Note visible details such as clothing , items being carried or placed , tools or equipment present , and how they interact with the property or objects .
2025-10-01 02:07:16 +03:00
- Consider the full sequence chronologically : what happens from start to finish , how duration and actions relate to the location and objects involved .
- * * Use the actual timestamp provided in " Activity started at " * * below for time of day context — do not infer time from image brightness or darkness . Unusual hours ( late night / early morning ) should increase suspicion when the observable behavior itself appears questionable . However , recognize that some legitimate activities can occur at any hour .
2025-10-30 17:52:55 +03:00
- * * Consider duration as a primary factor * * : Apply the duration thresholds defined in the activity patterns above . Brief sequences during normal hours with apparent purpose typically indicate normal activity unless explicit suspicious actions are visible .
- * * Weigh all evidence holistically * * : Match the activity against the normal and suspicious patterns defined above , then evaluate based on the complete context ( zone , objects , time , actions , duration ) . Apply the threat level indicators consistently . Use your judgment for edge cases .
2025-10-01 02:07:16 +03:00
2025-10-26 00:40:04 +03:00
## Response Format
2025-08-13 01:27:35 +03:00
Your response MUST be a flat JSON object with :
2025-11-08 23:13:40 +03:00
- ` title ` ( string ) : A concise , direct title that describes the primary action or event in the sequence , not just what you literally see . Use spatial context when available to make titles more meaningful . When multiple objects / actions are present , prioritize whichever is most prominent or occurs first . Use names from " Objects in Scene " based on what you visually observe . If you see both a name and an unidentified object of the same type but visually observe only one person / object , use ONLY the name . Examples : " Joe walking dog " , " Person taking out trash " , " Vehicle arriving in driveway " , " Joe accessing vehicle " , " Person leaving porch for driveway " .
2025-11-08 15:44:30 +03:00
- ` scene ` ( string ) : A narrative description of what happens across the sequence from start to finish , in chronological order . Start by describing how the sequence begins , then describe the progression of events . * * Describe all significant movements and actions in the order they occur . * * For example , if a vehicle arrives and then a person exits , describe both actions sequentially . * * Only describe actions you can actually observe happening in the frames provided . * * Do not infer or assume actions that aren ' t visible (e.g., if you see someone walking but never see them sit, don ' t say they sat down ) . Include setting , detected objects , and their observable actions . Avoid speculation or filling in assumed behaviors . Your description should align with and support the threat level you assign .
2025-09-30 15:52:38 +03:00
- ` confidence ` ( float ) : 0 - 1 confidence in your analysis . Higher confidence when objects / actions are clearly visible and context is unambiguous . Lower confidence when the sequence is unclear , objects are partially obscured , or context is ambiguous .
2025-10-30 17:52:55 +03:00
- ` potential_threat_level ` ( integer ) : 0 , 1 , or 2 as defined in " Normal Activity Patterns for This Property " above . Your threat level must be consistent with your scene description and the guidance above .
2025-08-13 18:28:01 +03:00
{ get_concern_prompt ( ) }
2025-08-11 22:17:25 +03:00
2025-10-26 00:40:04 +03:00
## Sequence Details
2025-08-15 16:25:49 +03:00
- Frame 1 = earliest , Frame { len ( thumbnails ) } = latest
- Activity started at { review_data [ " start " ] } and lasted { review_data [ " duration " ] } seconds
2025-11-10 20:03:56 +03:00
- Zones involved : { " , " . join ( review_data [ " zones " ] ) if review_data [ " zones " ] else " None " }
2025-08-13 01:27:35 +03:00
2025-10-27 00:37:57 +03:00
## Objects in Scene
2025-10-31 21:40:31 +03:00
Each line represents a detection state , not necessarily unique individuals . Parentheses indicate object type or category , use only the name / label in your response , not the parentheses .
2025-10-28 16:28:36 +03:00
2025-10-29 17:40:50 +03:00
* * CRITICAL : When you see both recognized and unrecognized entries of the same type ( e . g . , " Joe (person) " and " Person " ) , visually count how many distinct people / objects you actually see based on appearance and clothing . If you observe only ONE person throughout the sequence , use ONLY the recognized name ( e . g . , " Joe " ) . The same person may be recognized in some frames but not others . Only describe both if you visually see MULTIPLE distinct people with clearly different appearances . * *
2025-10-28 16:28:36 +03:00
2025-10-29 17:40:50 +03:00
* * Note : Unidentified objects ( without names ) are NOT indicators of suspicious activity — they simply mean the system hasn ' t identified that object.**
2025-10-27 00:37:57 +03:00
{ get_objects_list ( ) }
2025-10-26 00:40:04 +03:00
## Important Notes
2025-08-11 22:17:25 +03:00
- Values must be plain strings , floats , or integers — no nested objects , no extra commentary .
2025-10-27 00:37:57 +03:00
- Only describe objects from the " Objects in Scene " list above . Do not hallucinate additional objects .
- When describing people or vehicles , use the exact names provided .
2025-08-13 18:28:01 +03:00
{ get_language_prompt ( ) }
2025-09-26 05:05:22 +03:00
"""
2025-08-10 19:24:08 +03:00
logger . debug (
f " Sending { len ( thumbnails ) } images to create review description on { review_data [ ' camera ' ] } "
)
2025-08-13 01:27:35 +03:00
if debug_save :
with open (
os . path . join (
CLIPS_DIR , " genai-requests " , review_data [ " id " ] , " prompt.txt "
) ,
" w " ,
) as f :
f . write ( context_prompt )
2025-08-10 14:57:54 +03:00
response = self . _send ( context_prompt , thumbnails )
2025-08-19 15:49:55 +03:00
if debug_save and response :
2025-08-15 16:25:49 +03:00
with open (
os . path . join (
CLIPS_DIR , " genai-requests " , review_data [ " id " ] , " response.txt "
) ,
" w " ,
) as f :
f . write ( response )
2025-08-10 14:57:54 +03:00
if response :
clean_json = re . sub (
r " \ n?```$ " , " " , re . sub ( r " ^```[a-zA-Z0-9]* \ n? " , " " , response )
)
try :
2025-08-15 16:25:49 +03:00
metadata = ReviewMetadata . model_validate_json ( clean_json )
2025-10-29 17:40:50 +03:00
# If any verified objects (contain parentheses with name), set to 0
if any ( " ( " in obj for obj in review_data [ " unified_objects " ] ) :
2025-08-15 16:25:49 +03:00
metadata . potential_threat_level = 0
2025-09-26 05:05:22 +03:00
metadata . time = review_data [ " start " ]
2025-08-15 16:25:49 +03:00
return metadata
2025-08-10 14:57:54 +03:00
except Exception as e :
# rarely LLMs can fail to follow directions on output format
logger . warning (
f " Failed to parse review description as the response did not match expected format. { e } "
)
return None
else :
return None
2025-08-13 01:27:35 +03:00
def generate_review_summary (
2025-09-26 05:05:22 +03:00
self ,
start_ts : float ,
end_ts : float ,
segments : list [ dict [ str , Any ] ] ,
debug_save : bool ,
2025-08-13 01:27:35 +03:00
) - > str | None :
""" Generate a summary of review item descriptions over a period of time. """
2025-09-26 05:05:22 +03:00
time_range = f " { datetime . datetime . fromtimestamp ( start_ts ) . strftime ( ' % B %d , % Y at % I: % M % p ' ) } to { datetime . datetime . fromtimestamp ( end_ts ) . strftime ( ' % B %d , % Y at % I: % M % p ' ) } "
2025-08-13 01:27:35 +03:00
timeline_summary_prompt = f """
2025-09-26 05:05:22 +03:00
You are a security officer .
Time range : { time_range } .
2025-12-04 19:00:37 +03:00
Input : JSON list with " title " , " scene " , " confidence " , " potential_threat_level " ( 0 - 2 ) , " other_concerns " , " _is_primary " , " _camera " .
2025-09-26 05:05:22 +03:00
Task : Write a concise , human - presentable security report in markdown format .
2025-12-04 19:09:06 +03:00
CRITICAL - Understanding Primary vs Contextual Items :
- Items with " _is_primary " : true are events that REQUIRE REVIEW and MUST be included in the report
2025-12-04 19:18:29 +03:00
- Items with " _is_primary " : false are additional context from other camera perspectives that overlap in time
2025-12-04 19:09:06 +03:00
- * * DO NOT create separate bullet points or sections for contextual items * *
- * * ONLY use contextual items to enrich and inform the description of primary items * *
2025-12-04 19:00:37 +03:00
- The " _camera " field indicates which camera captured each event
2025-12-04 19:18:29 +03:00
- * * When a contextual item provides relevant background , you MUST incorporate it directly into the primary event ' s bullet point**
- Contextual information often explains or de - escalates seemingly suspicious primary events
2025-12-04 19:00:37 +03:00
2025-09-26 05:05:22 +03:00
Rules for the report :
- Title & overview
- Start with :
# Security Summary - {time_range}
- Write a 1 - 2 sentence situational overview capturing the general pattern of the period .
2025-12-04 19:18:29 +03:00
- Keep the overview high - level ; specific details will be in the event bullets below .
2025-09-26 05:05:22 +03:00
- Event details
2025-12-04 19:09:06 +03:00
- * * ONLY create bullet points for PRIMARY items ( _is_primary : true ) * *
2025-12-04 19:18:29 +03:00
- * * Do NOT create sections or bullets for events that don ' t exist**
2025-12-04 19:09:06 +03:00
- Do NOT create separate bullets for contextual items
- Present primary events in chronological order as a bullet list .
2025-12-04 19:18:29 +03:00
- * * CRITICAL : When contextual items overlap with a primary event , you MUST weave that information directly into the same bullet point * *
- Format : * * [ Timestamp ] * * - [ Description incorporating any contextual information ] . [ Camera info ] . ( threat level : X )
- If contextual information provides an explanation ( e . g . , delivery truck → person is likely delivery driver ) , reflect this understanding in your description and potentially adjust the perceived threat level
2025-12-04 19:09:06 +03:00
- If multiple PRIMARY events occur within the same minute , combine them into a single bullet with sub - points .
2025-09-26 05:05:22 +03:00
- Use bold timestamps for clarity .
2025-12-04 19:18:29 +03:00
- Camera format : " Camera: [camera name] " or mention contextual cameras inline when relevant
- Group bullets under subheadings ONLY when you have actual PRIMARY events to list ( e . g . , Porch Activity , Unusual Behavior ) .
2025-09-26 05:05:22 +03:00
- Threat levels
2025-12-04 19:18:29 +03:00
- Show the threat level for PRIMARY events using these labels :
2025-11-27 16:58:35 +03:00
- Threat level 0 : " Normal "
- Threat level 1 : " Needs review "
- Threat level 2 : " Security concern "
- Format as ( threat level : Normal ) , ( threat level : Needs review ) , or ( threat level : Security concern ) .
2025-12-04 19:18:29 +03:00
- * * When contextual items clearly explain a primary event ( e . g . , delivery truck explains person at door ) , you should describe it as normal activity and note the explanation * *
- * * Your description and tone should reflect the fuller understanding provided by contextual information * *
- Example : Primary event says " unidentified person with face covering " but context shows delivery truck → describe as " delivery person (truck visible on Front Driveway Cam) " rather than emphasizing suspicious elements
- The stored threat level remains as originally classified , but your narrative should reflect the contextual understanding
2025-12-04 19:09:06 +03:00
- If multiple PRIMARY events at the same time share the same threat level , only state it once .
2025-09-26 05:05:22 +03:00
- Final assessment
- End with a Final Assessment section .
2025-12-04 19:00:37 +03:00
- If all primary events are threat level 0 or explained by contextual items :
2025-09-26 05:05:22 +03:00
Final assessment : Only normal residential activity observed during this period .
2025-11-27 16:58:35 +03:00
- If threat level 1 events are present :
Final assessment : Some activity requires review but no security concerns identified .
- If threat level 2 events are present , clearly summarize them as Security concerns requiring immediate attention .
2025-12-04 19:09:06 +03:00
- Keep this section brief - do not repeat details from the event descriptions above .
2025-09-26 05:05:22 +03:00
- Conciseness
- Do not repeat benign clothing / appearance details unless they distinguish individuals .
- Summarize similar routine events instead of restating full scene descriptions .
2025-12-04 19:09:06 +03:00
- When incorporating contextual information , do so briefly and naturally within the primary event description .
- Avoid lengthy explanatory notes - integrate context seamlessly into the narrative .
2025-09-26 05:05:22 +03:00
"""
2025-08-13 01:27:35 +03:00
for item in segments :
timeline_summary_prompt + = f " \n { item } "
2025-09-26 05:05:22 +03:00
if debug_save :
with open (
os . path . join (
CLIPS_DIR , " genai-requests " , f " { start_ts } - { end_ts } " , " prompt.txt "
) ,
" w " ,
) as f :
f . write ( timeline_summary_prompt )
response = self . _send ( timeline_summary_prompt , [ ] )
if debug_save and response :
with open (
os . path . join (
CLIPS_DIR , " genai-requests " , f " { start_ts } - { end_ts } " , " response.txt "
) ,
" w " ,
) as f :
f . write ( response )
return response
2025-08-13 01:27:35 +03:00
2025-08-10 14:57:54 +03:00
def generate_object_description (
2024-09-16 17:46:11 +03:00
self ,
camera_config : CameraConfig ,
thumbnails : list [ bytes ] ,
2024-10-12 15:19:24 +03:00
event : Event ,
2024-06-22 00:30:19 +03:00
) - > Optional [ str ] :
""" Generate a description for the frame. """
2025-08-20 16:03:50 +03:00
try :
2025-10-02 14:48:16 +03:00
prompt = camera_config . objects . genai . object_prompts . get (
2025-08-20 16:03:50 +03:00
event . label ,
2025-10-02 14:48:16 +03:00
camera_config . objects . genai . prompt ,
2025-08-20 16:03:50 +03:00
) . format ( * * model_to_dict ( event ) )
except KeyError as e :
logger . error ( f " Invalid key in GenAI prompt: { e } " )
return None
2024-11-16 00:24:17 +03:00
logger . debug ( f " Sending images to genai provider with prompt: { prompt } " )
2024-06-22 00:30:19 +03:00
return self . _send ( prompt , thumbnails )
def _init_provider ( self ) :
""" Initialize the client. """
return None
def _send ( self , prompt : str , images : list [ bytes ] ) - > Optional [ str ] :
""" Submit a request to the provider. """
return None
2025-10-02 18:17:25 +03:00
def get_context_size ( self ) - > int :
""" Get the context window size for this provider in tokens. """
return 4096
2024-06-22 00:30:19 +03:00
2024-11-09 16:48:53 +03:00
def get_genai_client ( config : FrigateConfig ) - > Optional [ GenAIClient ] :
2024-06-22 00:30:19 +03:00
""" Get the GenAI client. """
2025-08-11 22:17:25 +03:00
if not config . genai . provider :
return None
2025-08-09 01:33:11 +03:00
load_providers ( )
provider = PROVIDERS . get ( config . genai . provider )
if provider :
return provider ( config . genai )
2024-11-09 16:48:53 +03:00
2024-06-22 00:30:19 +03:00
return None
def load_providers ( ) :
package_dir = os . path . dirname ( __file__ )
for filename in os . listdir ( package_dir ) :
if filename . endswith ( " .py " ) and filename != " __init__.py " :
module_name = f " frigate.genai. { filename [ : - 3 ] } "
importlib . import_module ( module_name )