Disable thinking for descriptions automatically

This commit is contained in:
Nicolas Mowen 2026-05-21 09:23:06 -06:00
parent 07c2872b08
commit 44ec45ab41
5 changed files with 18 additions and 2 deletions

View File

@ -222,8 +222,15 @@ class GenAIClient:
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to the provider.""" """Submit a request to the provider.
``enable_thinking`` is honored only by providers that report
``supports_toggleable_thinking``. Description-style callers leave it
at the default (off) since synthesis tasks don't benefit from
reasoning traces.
"""
return None return None
@property @property

View File

@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to Gemini.""" """Submit a request to Gemini."""
contents = [prompt] + [ contents = [prompt] + [

View File

@ -312,6 +312,7 @@ class LlamaCppClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to llama.cpp server.""" """Submit a request to llama.cpp server."""
if self.provider is None: if self.provider is None:
@ -339,7 +340,7 @@ class LlamaCppClient(GenAIClient):
) )
# Build request payload with llama.cpp native options # Build request payload with llama.cpp native options
payload = { payload: dict[str, Any] = {
"model": self.genai_config.model, "model": self.genai_config.model,
"messages": [ "messages": [
{ {
@ -353,6 +354,9 @@ class LlamaCppClient(GenAIClient):
if response_format: if response_format:
payload["response_format"] = response_format payload["response_format"] = response_format
if self.supports_toggleable_thinking:
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
response = requests.post( response = requests.post(
f"{self.provider}/v1/chat/completions", f"{self.provider}/v1/chat/completions",
json=payload, json=payload,

View File

@ -194,6 +194,7 @@ class OllamaClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to Ollama""" """Submit a request to Ollama"""
if self.provider is None: if self.provider is None:
@ -210,6 +211,8 @@ class OllamaClient(GenAIClient):
schema = response_format.get("json_schema", {}).get("schema") schema = response_format.get("json_schema", {}).get("schema")
if schema: if schema:
ollama_options["format"] = self._clean_schema_for_ollama(schema) ollama_options["format"] = self._clean_schema_for_ollama(schema)
if self.supports_toggleable_thinking:
ollama_options["think"] = enable_thinking
logger.debug( logger.debug(
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, " "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
"has_format=%s, options=%s", "has_format=%s, options=%s",

View File

@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to OpenAI.""" """Submit a request to OpenAI."""
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images] encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]