GenAI Tweaks (#22968)

* Add debug logs

* refresh embeddings maintainer genai clients on config update

---------

Co-authored-by: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
This commit is contained in:
Nicolas Mowen 2026-04-22 09:55:54 -06:00 committed by GitHub
parent ad9092d0da
commit b7261c8e70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 47 additions and 3 deletions

View File

@ -310,6 +310,10 @@ class EmbeddingMaintainer(threading.Thread):
self._handle_custom_classification_update(topic, payload)
return
if topic == "config/genai":
self.config.genai = payload
self.genai_manager.update_config(self.config)
# Broadcast to all processors — each decides if the topic is relevant
for processor in self.realtime_processors:
processor.update_config(topic, payload)

View File

@ -113,6 +113,15 @@ class OllamaClient(GenAIClient):
schema = response_format.get("json_schema", {}).get("schema")
if schema:
ollama_options["format"] = self._clean_schema_for_ollama(schema)
logger.debug(
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
"has_format=%s, options=%s",
self.genai_config.model,
len(prompt),
len(images) if images else 0,
"format" in ollama_options,
{k: v for k, v in ollama_options.items() if k != "format"},
)
result = self.provider.generate(
self.genai_config.model,
prompt,
@ -120,9 +129,24 @@ class OllamaClient(GenAIClient):
**ollama_options,
)
logger.debug(
f"Ollama tokens used: eval_count={result.get('eval_count')}, prompt_eval_count={result.get('prompt_eval_count')}"
"Ollama generate response: done=%s, done_reason=%s, eval_count=%s, "
"prompt_eval_count=%s, response_len=%s",
result.get("done"),
result.get("done_reason"),
result.get("eval_count"),
result.get("prompt_eval_count"),
len(result.get("response", "") or ""),
)
return str(result["response"]).strip()
response_text = str(result["response"]).strip()
if not response_text:
logger.warning(
"Ollama returned a blank response for model %s (done_reason=%s, "
"eval_count=%s). Check model output, ensure thinking is disabled.",
self.genai_config.model,
result.get("done_reason"),
result.get("eval_count"),
)
return response_text
except (
TimeoutException,
ResponseError,

View File

@ -80,7 +80,23 @@ class OpenAIClient(GenAIClient):
and hasattr(result, "choices")
and len(result.choices) > 0
):
return str(result.choices[0].message.content.strip())
message = result.choices[0].message
content = message.content
if not content:
# When reasoning is enabled for some OpenAI backends the actual response
# is incorrectly placed in reasoning_content instead of content.
# This is buggy/incorrect behavior — reasoning should not be
# enabled for these models.
reasoning_content = getattr(message, "reasoning_content", None)
if reasoning_content:
logger.warning(
"Response content was empty but reasoning_content was provided; "
"reasoning appears to be enabled and should be disabled for this model."
)
content = reasoning_content
return str(content.strip()) if content else None
return None
except (TimeoutException, Exception) as e:
logger.warning("OpenAI returned an error: %s", str(e))