From cf8638f260b9c6f4b3dec437a4b1eb5e7de4eeb2 Mon Sep 17 00:00:00 2001
From: felalex <felalex@gmail.com>
Date: Sat, 2 May 2026 23:51:39 -0700
Subject: [PATCH] fix: explicitly set enable_mem_pattern for fixed-size models

Previously relied on ORT's implicit default (True) for fixed-size models,
only flipping the flag in the variable-length branch. Set it explicitly
in both branches to be robust against ORT default changes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 frigate/detectors/detection_runners.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py
index e397d73fb..6e9eeca76 100644
--- a/frigate/detectors/detection_runners.py
+++ b/frigate/detectors/detection_runners.py
@@ -32,7 +32,7 @@ def get_ort_session_options(
         is_complex_model: Whether the model needs basic optimization to avoid graph fusion issues.
         variable_length_inputs: Whether the model receives variable-length inputs (e.g. text
             embeddings).  When True, disables memory-pattern caching, which otherwise builds
-            a plan per unique input shape and holds onto mmap regions indefinitely — a major
+            a plan per unique input shape and holds onto mmap regions indefinitely - a major
             source of RSS growth in the embeddings_manager process.
 
     Returns:
@@ -40,15 +40,18 @@ def get_ort_session_options(
     """
     sess_options = ort.SessionOptions()
     # Disable the CPU BFC arena for all sessions.  With the arena enabled ORT pools
-    # host-side staging buffers for GPU↔CPU transfers and never releases them back to
+    # host-side staging buffers for GPU -> CPU transfers and never releases them back to
     # the OS, causing RSS to grow without bound in long-running embedding processes.
     sess_options.enable_cpu_mem_arena = False
     if variable_length_inputs:
         # Disable per-shape memory-layout plan caching for models with variable-length
         # inputs (Jina CLIP text, PaddleOCR).  Each unique sequence length creates a
         # new mmap-backed plan that is never freed, leading to unbounded anon-mmap growth.
-        # Fixed-size models (YOLO at 640×640) should keep this enabled for buffer aliasing.
         sess_options.enable_mem_pattern = False
+    else:
+        # Fixed-size models (like YOLO ) keep mem_pattern on for buffer aliasing.
+        # Set explicitly to be robust against ORT default changes.
+        sess_options.enable_mem_pattern = True
     if is_complex_model:
         sess_options.graph_optimization_level = (
             ort.GraphOptimizationLevel.ORT_ENABLE_BASIC