Disable thinking for descriptions automatically

This commit is contained in:
Nicolas Mowen 2026-05-21 09:23:06 -06:00
parent 07c2872b08
commit 44ec45ab41
5 changed files with 18 additions and 2 deletions

View File

@ -222,8 +222,15 @@ class GenAIClient:
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]:
"""Submit a request to the provider."""
"""Submit a request to the provider.
``enable_thinking`` is honored only by providers that report
``supports_toggleable_thinking``. Description-style callers leave it
at the default (off) since synthesis tasks don't benefit from
reasoning traces.
"""
return None
@property

View File

@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]:
"""Submit a request to Gemini."""
contents = [prompt] + [

View File

@ -312,6 +312,7 @@ class LlamaCppClient(GenAIClient):
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]:
"""Submit a request to llama.cpp server."""
if self.provider is None:
@ -339,7 +340,7 @@ class LlamaCppClient(GenAIClient):
)
# Build request payload with llama.cpp native options
payload = {
payload: dict[str, Any] = {
"model": self.genai_config.model,
"messages": [
{
@ -353,6 +354,9 @@ class LlamaCppClient(GenAIClient):
if response_format:
payload["response_format"] = response_format
if self.supports_toggleable_thinking:
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
response = requests.post(
f"{self.provider}/v1/chat/completions",
json=payload,

View File

@ -194,6 +194,7 @@ class OllamaClient(GenAIClient):
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]:
"""Submit a request to Ollama"""
if self.provider is None:
@ -210,6 +211,8 @@ class OllamaClient(GenAIClient):
schema = response_format.get("json_schema", {}).get("schema")
if schema:
ollama_options["format"] = self._clean_schema_for_ollama(schema)
if self.supports_toggleable_thinking:
ollama_options["think"] = enable_thinking
logger.debug(
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
"has_format=%s, options=%s",

View File

@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
prompt: str,
images: list[bytes],
response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]:
"""Submit a request to OpenAI."""
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]