GenAI Tweaks (#22968)

* Add debug logs * refresh embeddings maintainer genai clients on config update --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
2026-06-21 03:41:55 +03:00 · 2026-04-22 09:55:54 -06:00 · 2026-04-22 09:55:54 -06:00 · b7261c8e70
commit b7261c8e70
parent ad9092d0da
3 changed files with 47 additions and 3 deletions
--- a/frigate/embeddings/maintainer.py
+++ b/frigate/embeddings/maintainer.py
@ -310,6 +310,10 @@ class EmbeddingMaintainer(threading.Thread):
            self._handle_custom_classification_update(topic, payload)
            return
        if topic == "config/genai":
            self.config.genai = payload
            self.genai_manager.update_config(self.config)
        # Broadcast to all processors — each decides if the topic is relevant
        for processor in self.realtime_processors:
            processor.update_config(topic, payload)
--- a/frigate/genai/ollama.py
+++ b/frigate/genai/ollama.py
@ -113,6 +113,15 @@ class OllamaClient(GenAIClient):
                schema = response_format.get("json_schema", {}).get("schema")
                if schema:
                    ollama_options["format"] = self._clean_schema_for_ollama(schema)
            logger.debug(
                "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
                "has_format=%s, options=%s",
                self.genai_config.model,
                len(prompt),
                len(images) if images else 0,
                "format" in ollama_options,
                {k: v for k, v in ollama_options.items() if k != "format"},
            )
            result = self.provider.generate(
                self.genai_config.model,
                prompt,
@ -120,9 +129,24 @@ class OllamaClient(GenAIClient):
                **ollama_options,
            )
            logger.debug(
-                f"Ollama tokens used: eval_count={result.get('eval_count')}, prompt_eval_count={result.get('prompt_eval_count')}"
+                "Ollama generate response: done=%s, done_reason=%s, eval_count=%s, "
                "prompt_eval_count=%s, response_len=%s",
                result.get("done"),
                result.get("done_reason"),
                result.get("eval_count"),
                result.get("prompt_eval_count"),
                len(result.get("response", "") or ""),
            )
-            return str(result["response"]).strip()
+            response_text = str(result["response"]).strip()
            if not response_text:
                logger.warning(
                    "Ollama returned a blank response for model %s (done_reason=%s, "
                    "eval_count=%s). Check model output, ensure thinking is disabled.",
                    self.genai_config.model,
                    result.get("done_reason"),
                    result.get("eval_count"),
                )
            return response_text
        except (
            TimeoutException,
            ResponseError,
--- a/frigate/genai/openai.py
+++ b/frigate/genai/openai.py
@ -80,7 +80,23 @@ class OpenAIClient(GenAIClient):
                and hasattr(result, "choices")
                and len(result.choices) > 0
            ):
-                return str(result.choices[0].message.content.strip())
+                message = result.choices[0].message
                content = message.content
                if not content:
                    # When reasoning is enabled for some OpenAI backends the actual response
                    # is incorrectly placed in reasoning_content instead of content.
                    # This is buggy/incorrect behavior — reasoning should not be
                    # enabled for these models.
                    reasoning_content = getattr(message, "reasoning_content", None)
                    if reasoning_content:
                        logger.warning(
                            "Response content was empty but reasoning_content was provided; "
                            "reasoning appears to be enabled and should be disabled for this model."
                        )
                        content = reasoning_content
                return str(content.strip()) if content else None
            return None
        except (TimeoutException, Exception) as e:
            logger.warning("OpenAI returned an error: %s", str(e))