mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-07 22:15:28 +03:00
fix: explicitly set enable_mem_pattern for fixed-size models
Previously relied on ORT's implicit default (True) for fixed-size models, only flipping the flag in the variable-length branch. Set it explicitly in both branches to be robust against ORT default changes. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
1717f21f69
commit
cf8638f260
@ -32,7 +32,7 @@ def get_ort_session_options(
|
|||||||
is_complex_model: Whether the model needs basic optimization to avoid graph fusion issues.
|
is_complex_model: Whether the model needs basic optimization to avoid graph fusion issues.
|
||||||
variable_length_inputs: Whether the model receives variable-length inputs (e.g. text
|
variable_length_inputs: Whether the model receives variable-length inputs (e.g. text
|
||||||
embeddings). When True, disables memory-pattern caching, which otherwise builds
|
embeddings). When True, disables memory-pattern caching, which otherwise builds
|
||||||
a plan per unique input shape and holds onto mmap regions indefinitely — a major
|
a plan per unique input shape and holds onto mmap regions indefinitely - a major
|
||||||
source of RSS growth in the embeddings_manager process.
|
source of RSS growth in the embeddings_manager process.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -40,15 +40,18 @@ def get_ort_session_options(
|
|||||||
"""
|
"""
|
||||||
sess_options = ort.SessionOptions()
|
sess_options = ort.SessionOptions()
|
||||||
# Disable the CPU BFC arena for all sessions. With the arena enabled ORT pools
|
# Disable the CPU BFC arena for all sessions. With the arena enabled ORT pools
|
||||||
# host-side staging buffers for GPU↔CPU transfers and never releases them back to
|
# host-side staging buffers for GPU -> CPU transfers and never releases them back to
|
||||||
# the OS, causing RSS to grow without bound in long-running embedding processes.
|
# the OS, causing RSS to grow without bound in long-running embedding processes.
|
||||||
sess_options.enable_cpu_mem_arena = False
|
sess_options.enable_cpu_mem_arena = False
|
||||||
if variable_length_inputs:
|
if variable_length_inputs:
|
||||||
# Disable per-shape memory-layout plan caching for models with variable-length
|
# Disable per-shape memory-layout plan caching for models with variable-length
|
||||||
# inputs (Jina CLIP text, PaddleOCR). Each unique sequence length creates a
|
# inputs (Jina CLIP text, PaddleOCR). Each unique sequence length creates a
|
||||||
# new mmap-backed plan that is never freed, leading to unbounded anon-mmap growth.
|
# new mmap-backed plan that is never freed, leading to unbounded anon-mmap growth.
|
||||||
# Fixed-size models (YOLO at 640×640) should keep this enabled for buffer aliasing.
|
|
||||||
sess_options.enable_mem_pattern = False
|
sess_options.enable_mem_pattern = False
|
||||||
|
else:
|
||||||
|
# Fixed-size models (like YOLO ) keep mem_pattern on for buffer aliasing.
|
||||||
|
# Set explicitly to be robust against ORT default changes.
|
||||||
|
sess_options.enable_mem_pattern = True
|
||||||
if is_complex_model:
|
if is_complex_model:
|
||||||
sess_options.graph_optimization_level = (
|
sess_options.graph_optimization_level = (
|
||||||
ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
|
ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user