From b7261c8e701ccc2e0af0ec5eb89aa1ccc973bf5a Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 22 Apr 2026 09:55:54 -0600 Subject: [PATCH] GenAI Tweaks (#22968) * Add debug logs * refresh embeddings maintainer genai clients on config update --------- Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> --- frigate/embeddings/maintainer.py | 4 ++++ frigate/genai/ollama.py | 28 ++++++++++++++++++++++++++-- frigate/genai/openai.py | 18 +++++++++++++++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index ea1c9a118..33bef38f2 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -310,6 +310,10 @@ class EmbeddingMaintainer(threading.Thread): self._handle_custom_classification_update(topic, payload) return + if topic == "config/genai": + self.config.genai = payload + self.genai_manager.update_config(self.config) + # Broadcast to all processors — each decides if the topic is relevant for processor in self.realtime_processors: processor.update_config(topic, payload) diff --git a/frigate/genai/ollama.py b/frigate/genai/ollama.py index 7524d54e3..1c0e222d9 100644 --- a/frigate/genai/ollama.py +++ b/frigate/genai/ollama.py @@ -113,6 +113,15 @@ class OllamaClient(GenAIClient): schema = response_format.get("json_schema", {}).get("schema") if schema: ollama_options["format"] = self._clean_schema_for_ollama(schema) + logger.debug( + "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, " + "has_format=%s, options=%s", + self.genai_config.model, + len(prompt), + len(images) if images else 0, + "format" in ollama_options, + {k: v for k, v in ollama_options.items() if k != "format"}, + ) result = self.provider.generate( self.genai_config.model, prompt, @@ -120,9 +129,24 @@ class OllamaClient(GenAIClient): **ollama_options, ) logger.debug( - f"Ollama tokens used: eval_count={result.get('eval_count')}, prompt_eval_count={result.get('prompt_eval_count')}" + "Ollama generate response: done=%s, done_reason=%s, eval_count=%s, " + "prompt_eval_count=%s, response_len=%s", + result.get("done"), + result.get("done_reason"), + result.get("eval_count"), + result.get("prompt_eval_count"), + len(result.get("response", "") or ""), ) - return str(result["response"]).strip() + response_text = str(result["response"]).strip() + if not response_text: + logger.warning( + "Ollama returned a blank response for model %s (done_reason=%s, " + "eval_count=%s). Check model output, ensure thinking is disabled.", + self.genai_config.model, + result.get("done_reason"), + result.get("eval_count"), + ) + return response_text except ( TimeoutException, ResponseError, diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py index 88108e730..af94859de 100644 --- a/frigate/genai/openai.py +++ b/frigate/genai/openai.py @@ -80,7 +80,23 @@ class OpenAIClient(GenAIClient): and hasattr(result, "choices") and len(result.choices) > 0 ): - return str(result.choices[0].message.content.strip()) + message = result.choices[0].message + content = message.content + + if not content: + # When reasoning is enabled for some OpenAI backends the actual response + # is incorrectly placed in reasoning_content instead of content. + # This is buggy/incorrect behavior — reasoning should not be + # enabled for these models. + reasoning_content = getattr(message, "reasoning_content", None) + if reasoning_content: + logger.warning( + "Response content was empty but reasoning_content was provided; " + "reasoning appears to be enabled and should be disabled for this model." + ) + content = reasoning_content + + return str(content.strip()) if content else None return None except (TimeoutException, Exception) as e: logger.warning("OpenAI returned an error: %s", str(e))