Support Dynamic Thinking Models (#23281)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions

* Add ability to toggle thinking

* Disable thinking for descriptions automatically

* mypy

* Cleanup
This commit is contained in:
Nicolas Mowen 2026-05-21 11:54:23 -06:00 committed by GitHub
parent 555ef89800
commit 66a2417229
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 410 additions and 175 deletions

View File

@ -1173,6 +1173,7 @@ async def chat_completion(
messages=conversation, messages=conversation,
tools=tools if tools else None, tools=tools if tools else None,
tool_choice="auto", tool_choice="auto",
enable_thinking=body.enable_thinking,
): ):
if await request.is_disconnected(): if await request.is_disconnected():
logger.debug("Client disconnected, stopping chat stream") logger.debug("Client disconnected, stopping chat stream")
@ -1267,6 +1268,7 @@ async def chat_completion(
messages=conversation, messages=conversation,
tools=tools if tools else None, tools=tools if tools else None,
tool_choice="auto", tool_choice="auto",
enable_thinking=body.enable_thinking,
) )
if response.get("finish_reason") == "error": if response.get("finish_reason") == "error":

View File

@ -36,3 +36,10 @@ class ChatCompletionRequest(BaseModel):
default=False, default=False,
description="If true, stream the final assistant response in the body as newline-delimited JSON.", description="If true, stream the final assistant response in the body as newline-delimited JSON.",
) )
enable_thinking: Optional[bool] = Field(
default=None,
description=(
"Per-request thinking toggle. None means use the provider default. "
"Ignored by providers that do not expose a per-request thinking switch."
),
)

View File

@ -222,8 +222,15 @@ class GenAIClient:
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to the provider.""" """Submit a request to the provider.
``enable_thinking`` is honored only by providers that report
``supports_toggleable_thinking``. Description-style callers leave it
at the default (off) since synthesis tasks don't benefit from
reasoning traces.
"""
return None return None
@property @property
@ -235,6 +242,11 @@ class GenAIClient:
""" """
return True return True
@property
def supports_toggleable_thinking(self) -> bool:
"""Whether the configured model exposes a per-request thinking toggle."""
return False
def list_models(self) -> list[str]: def list_models(self) -> list[str]:
"""Return the list of model names available from this provider. """Return the list of model names available from this provider.
@ -278,6 +290,7 @@ class GenAIClient:
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Send chat messages to LLM with optional tool definitions. Send chat messages to LLM with optional tool definitions.
@ -301,7 +314,9 @@ class GenAIClient:
- 'none': Model must not call tools - 'none': Model must not call tools
- 'required': Model must call at least one tool - 'required': Model must call at least one tool
- Or a dict specifying a specific tool to call - Or a dict specifying a specific tool to call
**kwargs: Additional provider-specific parameters. enable_thinking: Per-request thinking toggle. None means use the
provider default. Ignored by providers without a per-request
toggle (see `supports_toggleable_thinking`).
Returns: Returns:
Dictionary with: Dictionary with:

View File

@ -6,7 +6,7 @@ no chat feature is active) are never initialized.
""" """
import logging import logging
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Any, Optional
from frigate.config import FrigateConfig from frigate.config import FrigateConfig
from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum
@ -108,11 +108,16 @@ class GenAIClientManager:
name = self._role_map.get(GenAIRoleEnum.embeddings) name = self._role_map.get(GenAIRoleEnum.embeddings)
return self._get_client(name) if name else None return self._get_client(name) if name else None
def list_models(self) -> dict[str, list[str]]: def list_models(self) -> dict[str, dict[str, Any]]:
"""Return available models keyed by config entry name.""" """Return per-entry model lists and capabilities, keyed by config entry name."""
result: dict[str, list[str]] = {} result: dict[str, dict[str, Any]] = {}
for name in self._configs: for name, genai_cfg in self._configs.items():
client = self._get_client(name) client = self._get_client(name)
if client: if not client:
result[name] = client.list_models() continue
result[name] = {
"models": client.list_models(),
"roles": [r.value for r in genai_cfg.roles],
"supports_toggleable_thinking": client.supports_toggleable_thinking,
}
return result return result

View File

@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to Gemini.""" """Submit a request to Gemini."""
contents = [prompt] + [ contents = [prompt] + [
@ -119,11 +120,14 @@ class GeminiClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Send chat messages to Gemini with optional tool definitions. Send chat messages to Gemini with optional tool definitions.
Implements function calling/tool usage for Gemini models. Implements function calling/tool usage for Gemini models. Thinking is
configured at the model level for Gemini, so ``enable_thinking`` is
accepted for interface parity and ignored.
""" """
try: try:
# Convert messages to Gemini format # Convert messages to Gemini format

View File

@ -122,6 +122,7 @@ class LlamaCppClient(GenAIClient):
_supports_vision: bool _supports_vision: bool
_supports_audio: bool _supports_audio: bool
_supports_tools: bool _supports_tools: bool
_supports_reasoning: bool
_image_token_cache: dict[tuple[int, int], int] _image_token_cache: dict[tuple[int, int], int]
_text_baseline_tokens: int | None _text_baseline_tokens: int | None
_media_marker: str _media_marker: str
@ -135,6 +136,7 @@ class LlamaCppClient(GenAIClient):
self._supports_vision = False self._supports_vision = False
self._supports_audio = False self._supports_audio = False
self._supports_tools = False self._supports_tools = False
self._supports_reasoning = False
self._image_token_cache = {} self._image_token_cache = {}
self._text_baseline_tokens = None self._text_baseline_tokens = None
self._media_marker = "<__media__>" self._media_marker = "<__media__>"
@ -164,15 +166,17 @@ class LlamaCppClient(GenAIClient):
self._supports_vision = info["supports_vision"] self._supports_vision = info["supports_vision"]
self._supports_audio = info["supports_audio"] self._supports_audio = info["supports_audio"]
self._supports_tools = info["supports_tools"] self._supports_tools = info["supports_tools"]
self._supports_reasoning = info["supports_reasoning"]
self._media_marker = info["media_marker"] self._media_marker = info["media_marker"]
logger.info( logger.info(
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s", "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s, reasoning: %s",
configured_model, configured_model,
self._context_size or "unknown", self._context_size or "unknown",
self._supports_vision, self._supports_vision,
self._supports_audio, self._supports_audio,
self._supports_tools, self._supports_tools,
self._supports_reasoning,
) )
return base_url return base_url
@ -200,6 +204,7 @@ class LlamaCppClient(GenAIClient):
"supports_vision": False, "supports_vision": False,
"supports_audio": False, "supports_audio": False,
"supports_tools": False, "supports_tools": False,
"supports_reasoning": False,
"media_marker": "<__media__>", "media_marker": "<__media__>",
} }
@ -279,10 +284,17 @@ class LlamaCppClient(GenAIClient):
info["supports_vision"] = bool(modalities.get("vision", False)) info["supports_vision"] = bool(modalities.get("vision", False))
info["supports_audio"] = bool(modalities.get("audio", False)) info["supports_audio"] = bool(modalities.get("audio", False))
chat_caps = props.get("chat_template_caps") or {}
if not info["supports_tools"]: if not info["supports_tools"]:
chat_caps = props.get("chat_template_caps", {})
info["supports_tools"] = bool(chat_caps.get("supports_tools", False)) info["supports_tools"] = bool(chat_caps.get("supports_tools", False))
# llama.cpp does not advertise per-template reasoning support, so
# detect it by looking for the `enable_thinking` toggle variable
# in the Jinja chat template itself.
chat_template = props.get("chat_template") or ""
info["supports_reasoning"] = "enable_thinking" in chat_template
media_marker = props.get("media_marker") media_marker = props.get("media_marker")
if isinstance(media_marker, str) and media_marker: if isinstance(media_marker, str) and media_marker:
info["media_marker"] = media_marker info["media_marker"] = media_marker
@ -300,6 +312,7 @@ class LlamaCppClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to llama.cpp server.""" """Submit a request to llama.cpp server."""
if self.provider is None: if self.provider is None:
@ -327,7 +340,7 @@ class LlamaCppClient(GenAIClient):
) )
# Build request payload with llama.cpp native options # Build request payload with llama.cpp native options
payload = { payload: dict[str, Any] = {
"model": self.genai_config.model, "model": self.genai_config.model,
"messages": [ "messages": [
{ {
@ -341,6 +354,9 @@ class LlamaCppClient(GenAIClient):
if response_format: if response_format:
payload["response_format"] = response_format payload["response_format"] = response_format
if self.supports_toggleable_thinking:
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
response = requests.post( response = requests.post(
f"{self.provider}/v1/chat/completions", f"{self.provider}/v1/chat/completions",
json=payload, json=payload,
@ -377,6 +393,10 @@ class LlamaCppClient(GenAIClient):
"""Whether the loaded model supports tool/function calling.""" """Whether the loaded model supports tool/function calling."""
return self._supports_tools return self._supports_tools
@property
def supports_toggleable_thinking(self) -> bool:
return self._supports_reasoning
def list_models(self) -> list[str]: def list_models(self) -> list[str]:
"""Return available model IDs from the llama.cpp server.""" """Return available model IDs from the llama.cpp server."""
base_url = self.provider or ( base_url = self.provider or (
@ -504,6 +524,7 @@ class LlamaCppClient(GenAIClient):
tools: Optional[list[dict[str, Any]]], tools: Optional[list[dict[str, Any]]],
tool_choice: Optional[str], tool_choice: Optional[str],
stream: bool = False, stream: bool = False,
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Build request payload for chat completions (sync or stream).""" """Build request payload for chat completions (sync or stream)."""
openai_tool_choice = None openai_tool_choice = None
@ -519,14 +540,21 @@ class LlamaCppClient(GenAIClient):
"messages": messages, "messages": messages,
"model": self.genai_config.model, "model": self.genai_config.model,
} }
if stream: if stream:
payload["stream"] = True payload["stream"] = True
payload["stream_options"] = {"include_usage": True} payload["stream_options"] = {"include_usage": True}
payload["timings_per_token"] = True payload["timings_per_token"] = True
if tools: if tools:
payload["tools"] = tools payload["tools"] = tools
if openai_tool_choice is not None: if openai_tool_choice is not None:
payload["tool_choice"] = openai_tool_choice payload["tool_choice"] = openai_tool_choice
if enable_thinking is not None and self._supports_reasoning:
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
provider_opts = { provider_opts = {
k: v for k, v in self.provider_options.items() if k != "context_size" k: v for k, v in self.provider_options.items() if k != "context_size"
} }
@ -732,6 +760,7 @@ class LlamaCppClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Send chat messages to llama.cpp server with optional tool definitions. Send chat messages to llama.cpp server with optional tool definitions.
@ -749,7 +778,13 @@ class LlamaCppClient(GenAIClient):
"finish_reason": "error", "finish_reason": "error",
} }
try: try:
payload = self._build_payload(messages, tools, tool_choice, stream=False) payload = self._build_payload(
messages,
tools,
tool_choice,
stream=False,
enable_thinking=enable_thinking,
)
response = requests.post( response = requests.post(
f"{self.provider}/v1/chat/completions", f"{self.provider}/v1/chat/completions",
json=payload, json=payload,
@ -797,6 +832,7 @@ class LlamaCppClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> AsyncGenerator[tuple[str, Any], None]: ) -> AsyncGenerator[tuple[str, Any], None]:
"""Stream chat with tools via OpenAI-compatible streaming API.""" """Stream chat with tools via OpenAI-compatible streaming API."""
if self.provider is None: if self.provider is None:
@ -813,7 +849,13 @@ class LlamaCppClient(GenAIClient):
) )
return return
try: try:
payload = self._build_payload(messages, tools, tool_choice, stream=True) payload = self._build_payload(
messages,
tools,
tool_choice,
stream=True,
enable_thinking=enable_thinking,
)
content_parts: list[str] = [] content_parts: list[str] = []
reasoning_parts: list[str] = [] reasoning_parts: list[str] = []
tool_calls_by_index: dict[int, dict[str, Any]] = {} tool_calls_by_index: dict[int, dict[str, Any]] = {}

View File

@ -98,6 +98,22 @@ class OllamaClient(GenAIClient):
provider: ApiClient | None provider: ApiClient | None
provider_options: dict[str, Any] provider_options: dict[str, Any]
_supports_thinking_cache: Optional[bool] = None
@property
def supports_toggleable_thinking(self) -> bool:
if self._supports_thinking_cache is not None:
return self._supports_thinking_cache
if self.provider is None:
return False
try:
response = self.provider.show(self.genai_config.model)
capabilities = response.get("capabilities") or []
self._supports_thinking_cache = "thinking" in capabilities
except Exception as e:
logger.debug("Failed to query Ollama model capabilities: %s", e)
self._supports_thinking_cache = False
return self._supports_thinking_cache
def _auth_headers(self) -> dict | None: def _auth_headers(self) -> dict | None:
if self.genai_config.api_key: if self.genai_config.api_key:
@ -178,6 +194,7 @@ class OllamaClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to Ollama""" """Submit a request to Ollama"""
if self.provider is None: if self.provider is None:
@ -194,6 +211,8 @@ class OllamaClient(GenAIClient):
schema = response_format.get("json_schema", {}).get("schema") schema = response_format.get("json_schema", {}).get("schema")
if schema: if schema:
ollama_options["format"] = self._clean_schema_for_ollama(schema) ollama_options["format"] = self._clean_schema_for_ollama(schema)
if self.supports_toggleable_thinking:
ollama_options["think"] = enable_thinking
logger.debug( logger.debug(
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, " "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
"has_format=%s, options=%s", "has_format=%s, options=%s",
@ -274,6 +293,7 @@ class OllamaClient(GenAIClient):
tools: Optional[list[dict[str, Any]]], tools: Optional[list[dict[str, Any]]],
tool_choice: Optional[str], tool_choice: Optional[str],
stream: bool = False, stream: bool = False,
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Build request_messages and params for chat (sync or stream).""" """Build request_messages and params for chat (sync or stream)."""
request_messages = [] request_messages = []
@ -318,6 +338,8 @@ class OllamaClient(GenAIClient):
request_params["stream"] = True request_params["stream"] = True
if tools: if tools:
request_params["tools"] = tools request_params["tools"] = tools
if enable_thinking is not None and self.supports_toggleable_thinking:
request_params["think"] = enable_thinking
return request_params return request_params
def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]: def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
@ -365,6 +387,7 @@ class OllamaClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
if self.provider is None: if self.provider is None:
logger.warning( logger.warning(
@ -377,7 +400,11 @@ class OllamaClient(GenAIClient):
} }
try: try:
request_params = self._build_request_params( request_params = self._build_request_params(
messages, tools, tool_choice, stream=False messages,
tools,
tool_choice,
stream=False,
enable_thinking=enable_thinking,
) )
response = self.provider.chat(**request_params) response = self.provider.chat(**request_params)
return self._message_from_response(response) return self._message_from_response(response)
@ -401,6 +428,7 @@ class OllamaClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> AsyncGenerator[tuple[str, Any], None]: ) -> AsyncGenerator[tuple[str, Any], None]:
"""Stream chat with tools; yields content deltas then final message. """Stream chat with tools; yields content deltas then final message.
@ -430,7 +458,11 @@ class OllamaClient(GenAIClient):
"Ollama: tools provided, using non-streaming call for tool support" "Ollama: tools provided, using non-streaming call for tool support"
) )
request_params = self._build_request_params( request_params = self._build_request_params(
messages, tools, tool_choice, stream=False messages,
tools,
tool_choice,
stream=False,
enable_thinking=enable_thinking,
) )
async_client = OllamaAsyncClient( async_client = OllamaAsyncClient(
host=self.genai_config.base_url, host=self.genai_config.base_url,
@ -452,7 +484,11 @@ class OllamaClient(GenAIClient):
return return
request_params = self._build_request_params( request_params = self._build_request_params(
messages, tools, tool_choice, stream=True messages,
tools,
tool_choice,
stream=True,
enable_thinking=enable_thinking,
) )
async_client = OllamaAsyncClient( async_client = OllamaAsyncClient(
host=self.genai_config.base_url, host=self.genai_config.base_url,

View File

@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
prompt: str, prompt: str,
images: list[bytes], images: list[bytes],
response_format: Optional[dict] = None, response_format: Optional[dict] = None,
enable_thinking: bool = False,
) -> Optional[str]: ) -> Optional[str]:
"""Submit a request to OpenAI.""" """Submit a request to OpenAI."""
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images] encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
@ -187,11 +188,14 @@ class OpenAIClient(GenAIClient):
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
tools: Optional[list[dict[str, Any]]] = None, tools: Optional[list[dict[str, Any]]] = None,
tool_choice: Optional[str] = "auto", tool_choice: Optional[str] = "auto",
enable_thinking: Optional[bool] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Send chat messages to OpenAI with optional tool definitions. Send chat messages to OpenAI with optional tool definitions.
Implements function calling/tool usage for OpenAI models. Implements function calling/tool usage for OpenAI models. The OpenAI
chat completions API does not expose a per-request thinking toggle,
so ``enable_thinking`` is accepted for interface parity and ignored.
""" """
try: try:
openai_tool_choice = None openai_tool_choice = None

View File

@ -65,5 +65,8 @@
"active": "Reasoning…", "active": "Reasoning…",
"show": "Show reasoning", "show": "Show reasoning",
"hide": "Hide reasoning" "hide": "Hide reasoning"
},
"thinking": {
"toggle": "Toggle thinking"
} }
} }

View File

@ -0,0 +1,147 @@
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { FaArrowUpLong, FaStop } from "react-icons/fa6";
import { LuBrain } from "react-icons/lu";
import { useTranslation } from "react-i18next";
import { cn } from "@/lib/utils";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
type ChatComposerProps = {
input: string;
setInput: (value: string) => void;
sendMessage: (textOverride?: string) => void;
placeholder: string;
supportsThinking: boolean;
thinkingEnabled: boolean;
setThinkingEnabled: (value: boolean | undefined) => void;
isLoading?: boolean;
onStop?: () => void;
attachedEventId?: string | null;
onClearAttachment?: () => void;
onAttach?: (eventId: string) => void;
recentEventIds?: string[];
large?: boolean;
};
export function ChatComposer({
input,
setInput,
sendMessage,
placeholder,
supportsThinking,
thinkingEnabled,
setThinkingEnabled,
isLoading = false,
onStop,
attachedEventId,
onClearAttachment,
onAttach,
recentEventIds,
large = false,
}: ChatComposerProps) {
const { t } = useTranslation(["views/chat"]);
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
};
const showPaperclip = !!onAttach;
const showStop = isLoading && !!onStop;
return (
<div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
{attachedEventId && onClearAttachment && (
<div className="flex items-center">
<ChatAttachmentChip
eventId={attachedEventId}
mode="composer"
onRemove={onClearAttachment}
/>
</div>
)}
{attachedEventId && (
<ChatQuickReplies
onSend={(text) => sendMessage(text)}
disabled={isLoading}
/>
)}
<div className="flex w-full flex-row items-center gap-2">
{showPaperclip && (
<ChatPaperclipButton
recentEventIds={recentEventIds ?? []}
onAttach={onAttach!}
disabled={isLoading || attachedEventId != null}
/>
)}
{supportsThinking && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<Button
type="button"
size="sm"
variant={thinkingEnabled ? "select" : "ghost"}
aria-pressed={thinkingEnabled}
aria-label={t("thinking.toggle")}
className={cn(
"flex size-9 shrink-0 items-center justify-center rounded-full p-0",
!thinkingEnabled && "text-secondary-foreground",
)}
onClick={() => setThinkingEnabled(!thinkingEnabled)}
disabled={isLoading}
>
<LuBrain className="size-4" />
</Button>
</TooltipTrigger>
<TooltipContent>{t("thinking.toggle")}</TooltipContent>
</Tooltip>
</TooltipProvider>
)}
<Input
className={cn(
"w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent",
large && "h-12 text-base",
)}
placeholder={placeholder}
value={input}
onChange={(e) => setInput(e.target.value)}
onKeyDown={handleKeyDown}
aria-busy={isLoading}
/>
{showStop ? (
<Button
variant="destructive"
className="size-10 shrink-0 rounded-full"
onClick={onStop}
>
<FaStop className="size-3" />
</Button>
) : (
<Button
variant="select"
className="size-10 shrink-0 rounded-full"
disabled={!input.trim() || isLoading}
onClick={() => sendMessage()}
>
<FaArrowUpLong className="size-4" />
</Button>
)}
</div>
</div>
);
}

View File

@ -1,15 +1,22 @@
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { FaArrowUpLong } from "react-icons/fa6";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { useState } from "react"; import { useState } from "react";
import type { StartingRequest } from "@/types/chat"; import type { StartingRequest } from "@/types/chat";
import { ChatComposer } from "@/components/chat/ChatComposer";
type ChatStartingStateProps = { type ChatStartingStateProps = {
onSendMessage: (message: string) => void; onSendMessage: (message: string) => void;
supportsThinking: boolean;
thinkingEnabled: boolean;
setThinkingEnabled: (value: boolean | undefined) => void;
}; };
export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) { export function ChatStartingState({
onSendMessage,
supportsThinking,
thinkingEnabled,
setThinkingEnabled,
}: ChatStartingStateProps) {
const { t } = useTranslation(["views/chat"]); const { t } = useTranslation(["views/chat"]);
const [input, setInput] = useState(""); const [input, setInput] = useState("");
@ -36,20 +43,13 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
onSendMessage(prompt); onSendMessage(prompt);
}; };
const handleSubmit = () => { const handleSend = (textOverride?: string) => {
const text = input.trim(); const text = (textOverride ?? input).trim();
if (!text) return; if (!text) return;
onSendMessage(text); onSendMessage(text);
setInput(""); setInput("");
}; };
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleSubmit();
}
};
return ( return (
<div className="flex size-full flex-col items-center justify-center gap-6 p-8"> <div className="flex size-full flex-col items-center justify-center gap-6 p-8">
<div className="flex flex-col items-center gap-2"> <div className="flex flex-col items-center gap-2">
@ -77,22 +77,17 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
</div> </div>
</div> </div>
<div className="flex w-full max-w-2xl flex-row items-center gap-2 rounded-xl bg-secondary p-3"> <div className="w-full max-w-2xl">
<Input <ChatComposer
className="h-12 w-full flex-1 border-transparent bg-transparent text-base shadow-none focus-visible:ring-0 dark:bg-transparent" input={input}
setInput={setInput}
sendMessage={handleSend}
placeholder={t("placeholder")} placeholder={t("placeholder")}
value={input} supportsThinking={supportsThinking}
onChange={(e) => setInput(e.target.value)} thinkingEnabled={thinkingEnabled}
onKeyDown={handleKeyDown} setThinkingEnabled={setThinkingEnabled}
large
/> />
<Button
variant="select"
className="size-10 shrink-0 rounded-full"
disabled={!input.trim()}
onClick={handleSubmit}
>
<FaArrowUpLong size="18" />
</Button>
</div> </div>
</div> </div>
); );

View File

@ -8,6 +8,12 @@ import {
} from "@/components/ui/collapsible"; } from "@/components/ui/collapsible";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
type ReasoningBubbleProps = { type ReasoningBubbleProps = {
/** The accumulated reasoning text from the model. */ /** The accumulated reasoning text from the model. */
@ -54,34 +60,42 @@ export function ReasoningBubble({
return ( return (
<div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground"> <div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
<Collapsible open={open} onOpenChange={handleOpenChange}> <TooltipProvider>
<CollapsibleTrigger asChild> <Collapsible open={open} onOpenChange={handleOpenChange}>
<Button <CollapsibleTrigger asChild>
variant="ghost" <Button
size="sm" variant="ghost"
className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent" size="sm"
> className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
<LuBrain >
className={cn( <Tooltip>
"size-3 shrink-0", <TooltipTrigger asChild>
!answerStarted && "animate-pulse", <div className="flex items-center gap-2">
)} <LuBrain
/> className={cn(
<span className="break-words font-medium">{label}</span> "size-3 shrink-0",
{answerStarted && !answerStarted && "animate-pulse",
(open ? ( )}
<LuChevronDown className="ml-auto size-3 shrink-0" /> />
) : ( </div>
<LuChevronRight className="ml-auto size-3 shrink-0" /> </TooltipTrigger>
))} <TooltipContent>{label}</TooltipContent>
</Button> </Tooltip>
</CollapsibleTrigger> {answerStarted &&
<CollapsibleContent> (open ? (
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed"> <LuChevronDown className="ml-auto size-3 shrink-0" />
{reasoning} ) : (
</pre> <LuChevronRight className="ml-auto size-3 shrink-0" />
</CollapsibleContent> ))}
</Collapsible> </Button>
</CollapsibleTrigger>
<CollapsibleContent>
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
{reasoning}
</pre>
</CollapsibleContent>
</Collapsible>
</TooltipProvider>
</div> </div>
); );
} }

View File

@ -23,6 +23,7 @@ import {
PopoverTrigger, PopoverTrigger,
} from "@/components/ui/popover"; } from "@/components/ui/popover";
import type { ConfigFormContext, JsonObject } from "@/types/configForm"; import type { ConfigFormContext, JsonObject } from "@/types/configForm";
import type { GenAIModelsResponse } from "@/types/chat";
import { getSizedFieldClassName } from "../utils"; import { getSizedFieldClassName } from "../utils";
type ProbeResponse = type ProbeResponse =
@ -73,11 +74,12 @@ export function GenAIModelWidget(props: WidgetProps) {
return `${e.provider ?? ""}|${e.base_url ?? ""}`; return `${e.provider ?? ""}|${e.base_url ?? ""}`;
}, [providerKey, formContext?.fullConfig]); }, [providerKey, formContext?.fullConfig]);
const { data: allModels, mutate: mutateModels } = useSWR< const { data: allModels, mutate: mutateModels } = useSWR<GenAIModelsResponse>(
Record<string, string[]> "genai/models",
>("genai/models", { {
revalidateOnFocus: false, revalidateOnFocus: false,
}); },
);
// Revalidate models when the saved config fingerprint changes (e.g. after // Revalidate models when the saved config fingerprint changes (e.g. after
// switching provider or base_url and saving). // switching provider or base_url and saving).
@ -89,9 +91,9 @@ export function GenAIModelWidget(props: WidgetProps) {
} }
}, [configFingerprint, mutateModels]); }, [configFingerprint, mutateModels]);
const fetchedModels = useMemo(() => { const fetchedModels = useMemo<string[]>(() => {
if (!allModels || !providerKey) return []; if (!allModels || !providerKey) return [];
return allModels[providerKey] ?? []; return allModels[providerKey]?.models ?? [];
}, [allModels, providerKey]); }, [allModels, providerKey]);
const [probeStatus, setProbeStatus] = useState<ProbeStatus>("idle"); const [probeStatus, setProbeStatus] = useState<ProbeStatus>("idle");

View File

@ -1,20 +1,21 @@
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { FaArrowUpLong, FaStop } from "react-icons/fa6";
import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu"; import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { useState, useCallback, useRef, useEffect, useMemo } from "react"; import { useState, useCallback, useRef, useEffect, useMemo } from "react";
import axios from "axios"; import axios from "axios";
import useSWR from "swr";
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow"; import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
import { MessageBubble } from "@/components/chat/ChatMessage"; import { MessageBubble } from "@/components/chat/ChatMessage";
import { ReasoningBubble } from "@/components/chat/ReasoningBubble"; import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup"; import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
import { ChatStartingState } from "@/components/chat/ChatStartingState"; import { ChatStartingState } from "@/components/chat/ChatStartingState";
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip"; import { ChatComposer } from "@/components/chat/ChatComposer";
import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
import ChatSettings from "@/components/chat/ChatSettings"; import ChatSettings from "@/components/chat/ChatSettings";
import type { ChatMessage, ShowStatsMode } from "@/types/chat"; import type {
ChatMessage,
GenAIModelsResponse,
ShowStatsMode,
} from "@/types/chat";
import { usePersistence } from "@/hooks/use-persistence"; import { usePersistence } from "@/hooks/use-persistence";
import { import {
getEventIdsFromSearchObjectsToolCalls, getEventIdsFromSearchObjectsToolCalls,
@ -38,9 +39,26 @@ export default function ChatPage() {
"chat-auto-scroll", "chat-auto-scroll",
true, true,
); );
const [thinkingEnabled, setThinkingEnabled] = usePersistence<boolean>(
"chat-thinking-enabled",
false,
);
const scrollRef = useRef<HTMLDivElement>(null); const scrollRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null); const abortRef = useRef<AbortController | null>(null);
const { data: genaiInfo } = useSWR<GenAIModelsResponse>("genai/models", {
revalidateOnFocus: false,
});
const supportsThinking = useMemo(() => {
if (!genaiInfo) return false;
for (const entry of Object.values(genaiInfo)) {
if (entry.roles?.includes("chat") && entry.supports_toggleable_thinking) {
return true;
}
}
return false;
}, [genaiInfo]);
useEffect(() => { useEffect(() => {
document.title = t("documentTitle"); document.title = t("documentTitle");
}, [t]); }, [t]);
@ -100,9 +118,10 @@ export default function ChatPage() {
defaultErrorMessage: t("error"), defaultErrorMessage: t("error"),
}, },
controller.signal, controller.signal,
supportsThinking ? { enableThinking: !!thinkingEnabled } : {},
); );
}, },
[isLoading, t], [isLoading, supportsThinking, t, thinkingEnabled],
); );
const recentEventIds = useMemo(() => { const recentEventIds = useMemo(() => {
@ -305,6 +324,9 @@ export default function ChatPage() {
setInput(""); setInput("");
submitConversation([{ role: "user", content: message }]); submitConversation([{ role: "user", content: message }]);
}} }}
supportsThinking={supportsThinking}
thinkingEnabled={!!thinkingEnabled}
setThinkingEnabled={setThinkingEnabled}
/> />
)} )}
</div> </div>
@ -313,7 +335,7 @@ export default function ChatPage() {
{hasStarted && ( {hasStarted && (
<div className="flex shrink-0 justify-center p-2 md:px-4 md:pb-4"> <div className="flex shrink-0 justify-center p-2 md:px-4 md:pb-4">
<div className="flex w-full xl:w-[50%] 3xl:w-[35%]"> <div className="flex w-full xl:w-[50%] 3xl:w-[35%]">
<ChatEntry <ChatComposer
input={input} input={input}
setInput={setInput} setInput={setInput}
sendMessage={sendMessage} sendMessage={sendMessage}
@ -324,6 +346,9 @@ export default function ChatPage() {
onAttach={setAttachedEventId} onAttach={setAttachedEventId}
onStop={stopGeneration} onStop={stopGeneration}
recentEventIds={recentEventIds} recentEventIds={recentEventIds}
supportsThinking={supportsThinking}
thinkingEnabled={!!thinkingEnabled}
setThinkingEnabled={setThinkingEnabled}
/> />
</div> </div>
</div> </div>
@ -331,89 +356,3 @@ export default function ChatPage() {
</div> </div>
); );
} }
type ChatEntryProps = {
input: string;
setInput: (value: string) => void;
sendMessage: (textOverride?: string) => void;
isLoading: boolean;
placeholder: string;
attachedEventId: string | null;
onClearAttachment: () => void;
onAttach: (eventId: string) => void;
onStop: () => void;
recentEventIds: string[];
};
function ChatEntry({
input,
setInput,
sendMessage,
isLoading,
placeholder,
attachedEventId,
onClearAttachment,
onAttach,
onStop,
recentEventIds,
}: ChatEntryProps) {
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
sendMessage();
}
};
return (
<div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
{attachedEventId && (
<div className="flex items-center">
<ChatAttachmentChip
eventId={attachedEventId}
mode="composer"
onRemove={onClearAttachment}
/>
</div>
)}
{attachedEventId && (
<ChatQuickReplies
onSend={(text) => sendMessage(text)}
disabled={isLoading}
/>
)}
<div className="flex w-full flex-row items-center gap-2">
<ChatPaperclipButton
recentEventIds={recentEventIds}
onAttach={onAttach}
disabled={isLoading || attachedEventId != null}
/>
<Input
className="w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent"
placeholder={placeholder}
value={input}
onChange={(e) => setInput(e.target.value)}
onKeyDown={handleKeyDown}
aria-busy={isLoading}
/>
{isLoading ? (
<Button
variant="destructive"
className="size-10 shrink-0 rounded-full"
onClick={onStop}
>
<FaStop className="size-3" />
</Button>
) : (
<Button
variant="select"
className="size-10 shrink-0 rounded-full"
disabled={!input.trim()}
onClick={() => sendMessage()}
>
<FaArrowUpLong className="size-4" />
</Button>
)}
</div>
</div>
);
}

View File

@ -25,3 +25,11 @@ export type ChatStats = {
}; };
export type ShowStatsMode = "while_generating" | "always"; export type ShowStatsMode = "while_generating" | "always";
export type GenAIProviderInfo = {
models: string[];
roles: string[];
supports_toggleable_thinking: boolean;
};
export type GenAIModelsResponse = Record<string, GenAIProviderInfo>;

View File

@ -34,12 +34,17 @@ type StreamChunk =
* POST to chat/completion with stream: true, parse NDJSON stream, and invoke * POST to chat/completion with stream: true, parse NDJSON stream, and invoke
* callbacks so the caller can update UI (e.g. React state). * callbacks so the caller can update UI (e.g. React state).
*/ */
export type StreamChatOptions = {
enableThinking?: boolean;
};
export async function streamChatCompletion( export async function streamChatCompletion(
url: string, url: string,
headers: Record<string, string>, headers: Record<string, string>,
apiMessages: { role: string; content: string }[], apiMessages: { role: string; content: string }[],
callbacks: StreamChatCallbacks, callbacks: StreamChatCallbacks,
signal?: AbortSignal, signal?: AbortSignal,
options: StreamChatOptions = {},
): Promise<void> { ): Promise<void> {
const { const {
updateMessages, updateMessages,
@ -50,10 +55,17 @@ export async function streamChatCompletion(
} = callbacks; } = callbacks;
try { try {
const body: Record<string, unknown> = {
messages: apiMessages,
stream: true,
};
if (options.enableThinking !== undefined) {
body.enable_thinking = options.enableThinking;
}
const res = await fetch(url, { const res = await fetch(url, {
method: "POST", method: "POST",
headers, headers,
body: JSON.stringify({ messages: apiMessages, stream: true }), body: JSON.stringify(body),
signal, signal,
}); });