From 62ad2b09f786de5f5ced55281a3065a36b1ad9ca Mon Sep 17 00:00:00 2001
From: felalex <felalex@gmail.com>
Date: Sun, 3 May 2026 21:59:40 -0700
Subject: [PATCH] docs: clarify why mallopt is preferred over MALLOC_ARENA_MAX
 env var
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous wording ("forkserver spawn does not inherit Docker env
vars") was technically inaccurate — multiprocessing's spawn/forkserver
does pass the parent's environment via execve.  The real reasons
in-process mallopt is the right fix:

- glibc reads MALLOC_ARENA_MAX only once, at malloc init, before the
  Python interpreter is up.  Even if the env var arrives, it has to
  be present before the very first malloc call.
- s6-overlay service supervision (s6-setuidgid / s6-envuidgid) can
  filter the env passed to the supervised process; relying on it is
  brittle.
- mallopt(M_ARENA_MAX, n_cpu) is the official runtime equivalent and
  works regardless of how the process was spawned.

Updates the comment in EmbeddingProcess.run() and the matching test
docstrings.  No behavior change.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 frigate/embeddings/__init__.py         | 11 +++++++----
 frigate/test/test_detection_runners.py | 12 +++++++-----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py
index 610f03596..4f964343c 100644
--- a/frigate/embeddings/__init__.py
+++ b/frigate/embeddings/__init__.py
@@ -47,10 +47,13 @@ class EmbeddingProcess(FrigateProcess):
         self.metrics = metrics
 
     def run(self) -> None:
-        # Forkserver spawn exec's a fresh Python interpreter that does not
-        # inherit Docker env vars, so MALLOC_ARENA_MAX set in docker-compose
-        # never reaches this process.  Set it here via mallopt so glibc caps
-        # the number of malloc arenas to N_CPU instead of the default 8×N_CPU,
+        # glibc reads MALLOC_ARENA_MAX only once, at malloc init - before this
+        # Python interpreter is even up.  Setting it via docker-compose is
+        # brittle: it has to survive the s6-overlay service-supervision chain
+        # (which can filter env via s6-setuidgid/s6-envuidgid) and arrive
+        # before the very first malloc call.  Calling mallopt(M_ARENA_MAX, n_cpu)
+        # here is the runtime equivalent and works regardless of how we were
+        # spawned, capping arenas at N_CPU instead of the default 8×N_CPU and
         # preventing heap fragmentation under the embeddings workload.
         try:
             ctypes.CDLL("libc.so.6").mallopt(-8, os.cpu_count())  # M_ARENA_MAX
diff --git a/frigate/test/test_detection_runners.py b/frigate/test/test_detection_runners.py
index 2c593f7a0..c0d640f0c 100644
--- a/frigate/test/test_detection_runners.py
+++ b/frigate/test/test_detection_runners.py
@@ -257,8 +257,9 @@ class TestOrtLeakFixRegression(unittest.TestCase):
          Must remain True for fixed-size models (YOLO) to preserve buffer aliasing.
 
       3. mallopt(M_ARENA_MAX) — must be called from inside EmbeddingProcess.run()
-         because forkserver spawn does not inherit Docker env vars, so setting
-         MALLOC_ARENA_MAX in docker-compose has no effect on the child process.
+         because glibc reads MALLOC_ARENA_MAX once at malloc init, and the env
+         var is brittle to deliver through s6-overlay supervision before that
+         point.  In-process mallopt is the runtime-safe equivalent.
     """
 
     def test_get_optimized_runner_passes_variable_length_for_jina(self):
@@ -367,9 +368,10 @@ class TestOrtLeakFixRegression(unittest.TestCase):
     def test_embedding_process_calls_mallopt(self):
         """EmbeddingProcess.run() must call mallopt(M_ARENA_MAX) to cap glibc arenas.
 
-        Forkserver spawn exec's a fresh Python interpreter that does not inherit
-        Docker env vars.  MALLOC_ARENA_MAX set in docker-compose never reaches
-        the child process, so mallopt() must be called explicitly from run().
+        glibc reads MALLOC_ARENA_MAX only at malloc init, before this Python
+        interpreter is up, and the env var is brittle to deliver through the
+        s6-overlay service-supervision chain before that point.  mallopt()
+        is the runtime-safe equivalent and must be called explicitly from run().
         """
         import frigate.embeddings as emb_module