Improve review summary performance (#20328)

* Undo vite

* Balance the prompt

* Round duration

* Calculate context size to determine number of images

* Increase number of images
This commit is contained in:
Nicolas Mowen 2025-10-02 09:17:25 -06:00 committed by GitHub
parent 2030809a6d
commit 37999abbe6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 51 additions and 11 deletions

View File

@ -43,6 +43,21 @@ class ReviewDescriptionProcessor(PostProcessorApi):
self.review_descs_dps = EventsPerSecond() self.review_descs_dps = EventsPerSecond()
self.review_descs_dps.start() self.review_descs_dps.start()
def calculate_frame_count(self) -> int:
"""Calculate optimal number of frames based on context size."""
# With our preview images (height of 180px) each image should be ~100 tokens per image
# We want to be conservative to not have too long of query times with too many images
context_size = self.genai_client.get_context_size()
if context_size > 10000:
return 20
elif context_size > 6000:
return 16
elif context_size > 4000:
return 12
else:
return 8
def process_data(self, data, data_type): def process_data(self, data, data_type):
self.metrics.review_desc_dps.value = self.review_descs_dps.eps() self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
@ -176,7 +191,6 @@ class ReviewDescriptionProcessor(PostProcessorApi):
camera: str, camera: str,
start_time: float, start_time: float,
end_time: float, end_time: float,
desired_frame_count: int = 12,
) -> list[str]: ) -> list[str]:
preview_dir = os.path.join(CACHE_DIR, "preview_frames") preview_dir = os.path.join(CACHE_DIR, "preview_frames")
file_start = f"preview_{camera}" file_start = f"preview_{camera}"
@ -203,6 +217,8 @@ class ReviewDescriptionProcessor(PostProcessorApi):
all_frames.append(os.path.join(preview_dir, file)) all_frames.append(os.path.join(preview_dir, file))
frame_count = len(all_frames) frame_count = len(all_frames)
desired_frame_count = self.calculate_frame_count()
if frame_count <= desired_frame_count: if frame_count <= desired_frame_count:
return all_frames return all_frames
@ -235,7 +251,7 @@ def run_analysis(
"start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime( "start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime(
"%A, %I:%M %p" "%A, %I:%M %p"
), ),
"duration": final_data["end_time"] - final_data["start_time"], "duration": round(final_data["end_time"] - final_data["start_time"]),
} }
objects = [] objects = []

View File

@ -66,12 +66,15 @@ class GenAIClient:
context_prompt = f""" context_prompt = f"""
Please analyze the sequence of images ({len(thumbnails)} total) taken in chronological order from the perspective of the {review_data["camera"].replace("_", " ")} security camera. Please analyze the sequence of images ({len(thumbnails)} total) taken in chronological order from the perspective of the {review_data["camera"].replace("_", " ")} security camera.
**Normal activity patterns for this property:**
{activity_context_prompt}
Your task is to provide a clear, accurate description of the scene that: Your task is to provide a clear, accurate description of the scene that:
1. States exactly what is happening based on observable actions and movements. 1. States exactly what is happening based on observable actions and movements.
2. Evaluates whether the observable evidence suggests normal activity for this property or genuine security concerns. 2. Evaluates whether the observable evidence suggests normal activity for this property or genuine security concerns.
3. Assigns a potential_threat_level based on the definitions below, applying them consistently. 3. Assigns a potential_threat_level based on the definitions below, applying them consistently.
Provide an objective assessment. The goal is accuracyneither missing genuine threats nor over-flagging routine activity for this property. **IMPORTANT: Start by checking if the activity matches the normal patterns above. If it does, assign Level 0. Only consider higher threat levels if the activity clearly deviates from normal patterns or shows genuine security concerns.**
When forming your description: When forming your description:
- **CRITICAL: Only describe objects explicitly listed in "Detected objects" below.** Do not infer or mention additional people, vehicles, or objects not present in the detected objects list, even if visual patterns suggest them. If only a car is detected, do not describe a person interacting with it unless "person" is also in the detected objects list. - **CRITICAL: Only describe objects explicitly listed in "Detected objects" below.** Do not infer or mention additional people, vehicles, or objects not present in the detected objects list, even if visual patterns suggest them. If only a car is detected, do not describe a person interacting with it unless "person" is also in the detected objects list.
@ -81,10 +84,7 @@ When forming your description:
- Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved. - Consider the full sequence chronologically: what happens from start to finish, how duration and actions relate to the location and objects involved.
- **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour. - **Use the actual timestamp provided in "Activity started at"** below for time of day contextdo not infer time from image brightness or darkness. Unusual hours (late night/early morning) should increase suspicion when the observable behavior itself appears questionable. However, recognize that some legitimate activities can occur at any hour.
- Identify patterns that suggest genuine security concerns: testing doors/windows on vehicles or buildings, accessing unauthorized areas, attempting to conceal actions, extended loitering without apparent purpose, taking items, behavior that clearly doesn't align with the zone context and detected objects. - Identify patterns that suggest genuine security concerns: testing doors/windows on vehicles or buildings, accessing unauthorized areas, attempting to conceal actions, extended loitering without apparent purpose, taking items, behavior that clearly doesn't align with the zone context and detected objects.
- **Weigh all evidence holistically**: Consider the complete picture including zone, objects, time, and actions together. A single ambiguous action should not override strong contextual evidence of normal activity. The overall pattern determines the threat level. - **Weigh all evidence holistically**: Start by checking if the activity matches the normal patterns above. If it does, assign Level 0. Only consider Level 1 if the activity clearly deviates from normal patterns or shows genuine security concerns that warrant attention.
**Normal activity patterns for this property:**
{activity_context_prompt}
Your response MUST be a flat JSON object with: Your response MUST be a flat JSON object with:
- `scene` (string): A narrative description of what happens across the sequence from start to finish. **Only describe actions you can actually observe happening in the frames provided.** Do not infer or assume actions that aren't visible (e.g., if you see someone walking but never see them sit, don't say they sat down). Include setting, detected objects, and their observable actions. Avoid speculation or filling in assumed behaviors. Your description should align with and support the threat level you assign. - `scene` (string): A narrative description of what happens across the sequence from start to finish. **Only describe actions you can actually observe happening in the frames provided.** Do not infer or assume actions that aren't visible (e.g., if you see someone walking but never see them sit, don't say they sat down). Include setting, detected objects, and their observable actions. Avoid speculation or filling in assumed behaviors. Your description should align with and support the threat level you assign.
@ -93,9 +93,9 @@ Your response MUST be a flat JSON object with:
{get_concern_prompt()} {get_concern_prompt()}
Threat-level definitions: Threat-level definitions:
- 0 Normal activity: What you observe is consistent with expected activity for this property type. The observable evidenceconsidering zone context, detected objects, and timing togethersupports a benign explanation. Use this for routine activities even if minor ambiguous elements exist. - 0 **Normal activity (DEFAULT)**: What you observe matches the normal activity patterns above or is consistent with expected activity for this property type. The observable evidenceconsidering zone context, detected objects, and timing togethersupports a benign explanation. **Use this level for routine activities even if minor ambiguous elements exist.**
- 1 Potentially suspicious: Observable behavior raises genuine security concerns that warrant human review. The evidence doesn't support a routine explanation when you consider the zone, objects, and actions together. Examples: testing doors/windows on vehicles or structures, accessing areas that don't align with the activity, taking items that likely don't belong to them, behavior clearly inconsistent with the zone and context, or activity that lacks any visible legitimate indicators. Reserve this level for situations that actually merit closer attention—not routine activities for this property. - 1 **Potentially suspicious**: Observable behavior raises genuine security concerns that warrant human review. The evidence doesn't support a routine explanation and clearly deviates from the normal patterns above. Examples: testing doors/windows on vehicles or structures, accessing areas that don't align with the activity, taking items that likely don't belong to them, behavior clearly inconsistent with the zone and context, or activity that lacks any visible legitimate indicators. **Only use this level when the activity clearly doesn't match normal patterns.**
- 2 Immediate threat: Clear evidence of forced entry, break-in, vandalism, aggression, weapons, theft in progress, or active property damage. - 2 **Immediate threat**: Clear evidence of forced entry, break-in, vandalism, aggression, weapons, theft in progress, or active property damage.
Sequence details: Sequence details:
- Frame 1 = earliest, Frame {len(thumbnails)} = latest - Frame 1 = earliest, Frame {len(thumbnails)} = latest
@ -253,6 +253,10 @@ Rules for the report:
"""Submit a request to the provider.""" """Submit a request to the provider."""
return None return None
def get_context_size(self) -> int:
"""Get the context window size for this provider in tokens."""
return 4096
def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]: def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]:
"""Get the GenAI client.""" """Get the GenAI client."""

View File

@ -71,3 +71,7 @@ class OpenAIClient(GenAIClient):
if len(result.choices) > 0: if len(result.choices) > 0:
return result.choices[0].message.content.strip() return result.choices[0].message.content.strip()
return None return None
def get_context_size(self) -> int:
"""Get the context window size for Azure OpenAI."""
return 128000

View File

@ -53,3 +53,8 @@ class GeminiClient(GenAIClient):
# No description was generated # No description was generated
return None return None
return description return description
def get_context_size(self) -> int:
"""Get the context window size for Gemini."""
# Gemini Pro Vision has a 1M token context window
return 1000000

View File

@ -54,3 +54,9 @@ class OllamaClient(GenAIClient):
except (TimeoutException, ResponseError) as e: except (TimeoutException, ResponseError) as e:
logger.warning("Ollama returned an error: %s", str(e)) logger.warning("Ollama returned an error: %s", str(e))
return None return None
def get_context_size(self) -> int:
"""Get the context window size for Ollama."""
return self.genai_config.provider_options.get("options", {}).get(
"num_ctx", 4096
)

View File

@ -66,3 +66,8 @@ class OpenAIClient(GenAIClient):
except (TimeoutException, Exception) as e: except (TimeoutException, Exception) as e:
logger.warning("OpenAI returned an error: %s", str(e)) logger.warning("OpenAI returned an error: %s", str(e))
return None return None
def get_context_size(self) -> int:
"""Get the context window size for OpenAI."""
# OpenAI GPT-4 Vision models have 128K token context window
return 128000

View File

@ -4,7 +4,7 @@ import { defineConfig } from "vite";
import react from "@vitejs/plugin-react-swc"; import react from "@vitejs/plugin-react-swc";
import monacoEditorPlugin from "vite-plugin-monaco-editor"; import monacoEditorPlugin from "vite-plugin-monaco-editor";
const proxyHost = process.env.PROXY_HOST || "192.168.50.106:5002"; const proxyHost = process.env.PROXY_HOST || "localhost:5000";
// https://vitejs.dev/config/ // https://vitejs.dev/config/
export default defineConfig({ export default defineConfig({