From 66a2417229e3131834946314d323de155ad385c7 Mon Sep 17 00:00:00 2001
From: Nicolas Mowen <nickmowen213@gmail.com>
Date: Thu, 21 May 2026 11:54:23 -0600
Subject: [PATCH] Support Dynamic Thinking Models (#23281)

* Add ability to toggle thinking

* Disable thinking for descriptions automatically

* mypy

* Cleanup
---
 frigate/api/chat.py                           |   2 +
 frigate/api/defs/request/chat_body.py         |   7 +
 frigate/genai/__init__.py                     |  19 ++-
 frigate/genai/manager.py                      |  19 ++-
 frigate/genai/plugins/gemini.py               |   6 +-
 frigate/genai/plugins/llama_cpp.py            |  52 ++++++-
 frigate/genai/plugins/ollama.py               |  42 ++++-
 frigate/genai/plugins/openai.py               |   6 +-
 web/public/locales/en/views/chat.json         |   3 +
 web/src/components/chat/ChatComposer.tsx      | 147 ++++++++++++++++++
 web/src/components/chat/ChatStartingState.tsx |  47 +++---
 web/src/components/chat/ReasoningBubble.tsx   |  70 +++++----
 .../theme/widgets/GenAIModelWidget.tsx        |  16 +-
 web/src/pages/Chat.tsx                        | 127 ++++-----------
 web/src/types/chat.ts                         |   8 +
 web/src/utils/chatUtil.ts                     |  14 +-
 16 files changed, 410 insertions(+), 175 deletions(-)
 create mode 100644 web/src/components/chat/ChatComposer.tsx

diff --git a/frigate/api/chat.py b/frigate/api/chat.py
index 291503dbba..c7d197bf91 100644
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@@ -1173,6 +1173,7 @@ async def chat_completion(
                     messages=conversation,
                     tools=tools if tools else None,
                     tool_choice="auto",
+                    enable_thinking=body.enable_thinking,
                 ):
                     if await request.is_disconnected():
                         logger.debug("Client disconnected, stopping chat stream")
@@ -1267,6 +1268,7 @@ async def chat_completion(
                 messages=conversation,
                 tools=tools if tools else None,
                 tool_choice="auto",
+                enable_thinking=body.enable_thinking,
             )
 
             if response.get("finish_reason") == "error":
diff --git a/frigate/api/defs/request/chat_body.py b/frigate/api/defs/request/chat_body.py
index 79ca3a6fef..228781c80b 100644
--- a/frigate/api/defs/request/chat_body.py
+++ b/frigate/api/defs/request/chat_body.py
@@ -36,3 +36,10 @@ class ChatCompletionRequest(BaseModel):
         default=False,
         description="If true, stream the final assistant response in the body as newline-delimited JSON.",
     )
+    enable_thinking: Optional[bool] = Field(
+        default=None,
+        description=(
+            "Per-request thinking toggle. None means use the provider default. "
+            "Ignored by providers that do not expose a per-request thinking switch."
+        ),
+    )
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index 28a6844d95..cf51550b47 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -222,8 +222,15 @@ class GenAIClient:
         prompt: str,
         images: list[bytes],
         response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
     ) -> Optional[str]:
-        """Submit a request to the provider."""
+        """Submit a request to the provider.
+
+        ``enable_thinking`` is honored only by providers that report
+        ``supports_toggleable_thinking``. Description-style callers leave it
+        at the default (off) since synthesis tasks don't benefit from
+        reasoning traces.
+        """
         return None
 
     @property
@@ -235,6 +242,11 @@ class GenAIClient:
         """
         return True
 
+    @property
+    def supports_toggleable_thinking(self) -> bool:
+        """Whether the configured model exposes a per-request thinking toggle."""
+        return False
+
     def list_models(self) -> list[str]:
         """Return the list of model names available from this provider.
 
@@ -278,6 +290,7 @@ class GenAIClient:
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """
         Send chat messages to LLM with optional tool definitions.
@@ -301,7 +314,9 @@ class GenAIClient:
                 - 'none': Model must not call tools
                 - 'required': Model must call at least one tool
                 - Or a dict specifying a specific tool to call
-            **kwargs: Additional provider-specific parameters.
+            enable_thinking: Per-request thinking toggle. None means use the
+                provider default. Ignored by providers without a per-request
+                toggle (see `supports_toggleable_thinking`).
 
         Returns:
             Dictionary with:
diff --git a/frigate/genai/manager.py b/frigate/genai/manager.py
index 94719f4291..a1325d3279 100644
--- a/frigate/genai/manager.py
+++ b/frigate/genai/manager.py
@@ -6,7 +6,7 @@ no chat feature is active) are never initialized.
 """
 
 import logging
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Any, Optional
 
 from frigate.config import FrigateConfig
 from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum
@@ -108,11 +108,16 @@ class GenAIClientManager:
         name = self._role_map.get(GenAIRoleEnum.embeddings)
         return self._get_client(name) if name else None
 
-    def list_models(self) -> dict[str, list[str]]:
-        """Return available models keyed by config entry name."""
-        result: dict[str, list[str]] = {}
-        for name in self._configs:
+    def list_models(self) -> dict[str, dict[str, Any]]:
+        """Return per-entry model lists and capabilities, keyed by config entry name."""
+        result: dict[str, dict[str, Any]] = {}
+        for name, genai_cfg in self._configs.items():
             client = self._get_client(name)
-            if client:
-                result[name] = client.list_models()
+            if not client:
+                continue
+            result[name] = {
+                "models": client.list_models(),
+                "roles": [r.value for r in genai_cfg.roles],
+                "supports_toggleable_thinking": client.supports_toggleable_thinking,
+            }
         return result
diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py
index bcac09d0e3..6e4b9283fb 100644
--- a/frigate/genai/plugins/gemini.py
+++ b/frigate/genai/plugins/gemini.py
@@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
         prompt: str,
         images: list[bytes],
         response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
     ) -> Optional[str]:
         """Submit a request to Gemini."""
         contents = [prompt] + [
@@ -119,11 +120,14 @@ class GeminiClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """
         Send chat messages to Gemini with optional tool definitions.
 
-        Implements function calling/tool usage for Gemini models.
+        Implements function calling/tool usage for Gemini models. Thinking is
+        configured at the model level for Gemini, so ``enable_thinking`` is
+        accepted for interface parity and ignored.
         """
         try:
             # Convert messages to Gemini format
diff --git a/frigate/genai/plugins/llama_cpp.py b/frigate/genai/plugins/llama_cpp.py
index 2dddf5244e..d5458cf8f9 100644
--- a/frigate/genai/plugins/llama_cpp.py
+++ b/frigate/genai/plugins/llama_cpp.py
@@ -122,6 +122,7 @@ class LlamaCppClient(GenAIClient):
     _supports_vision: bool
     _supports_audio: bool
     _supports_tools: bool
+    _supports_reasoning: bool
     _image_token_cache: dict[tuple[int, int], int]
     _text_baseline_tokens: int | None
     _media_marker: str
@@ -135,6 +136,7 @@ class LlamaCppClient(GenAIClient):
         self._supports_vision = False
         self._supports_audio = False
         self._supports_tools = False
+        self._supports_reasoning = False
         self._image_token_cache = {}
         self._text_baseline_tokens = None
         self._media_marker = "<__media__>"
@@ -164,15 +166,17 @@ class LlamaCppClient(GenAIClient):
         self._supports_vision = info["supports_vision"]
         self._supports_audio = info["supports_audio"]
         self._supports_tools = info["supports_tools"]
+        self._supports_reasoning = info["supports_reasoning"]
         self._media_marker = info["media_marker"]
 
         logger.info(
-            "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
+            "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s, reasoning: %s",
             configured_model,
             self._context_size or "unknown",
             self._supports_vision,
             self._supports_audio,
             self._supports_tools,
+            self._supports_reasoning,
         )
 
         return base_url
@@ -200,6 +204,7 @@ class LlamaCppClient(GenAIClient):
             "supports_vision": False,
             "supports_audio": False,
             "supports_tools": False,
+            "supports_reasoning": False,
             "media_marker": "<__media__>",
         }
 
@@ -279,10 +284,17 @@ class LlamaCppClient(GenAIClient):
                 info["supports_vision"] = bool(modalities.get("vision", False))
                 info["supports_audio"] = bool(modalities.get("audio", False))
 
+            chat_caps = props.get("chat_template_caps") or {}
+
             if not info["supports_tools"]:
-                chat_caps = props.get("chat_template_caps", {})
                 info["supports_tools"] = bool(chat_caps.get("supports_tools", False))
 
+            # llama.cpp does not advertise per-template reasoning support, so
+            # detect it by looking for the `enable_thinking` toggle variable
+            # in the Jinja chat template itself.
+            chat_template = props.get("chat_template") or ""
+            info["supports_reasoning"] = "enable_thinking" in chat_template
+
             media_marker = props.get("media_marker")
             if isinstance(media_marker, str) and media_marker:
                 info["media_marker"] = media_marker
@@ -300,6 +312,7 @@ class LlamaCppClient(GenAIClient):
         prompt: str,
         images: list[bytes],
         response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
     ) -> Optional[str]:
         """Submit a request to llama.cpp server."""
         if self.provider is None:
@@ -327,7 +340,7 @@ class LlamaCppClient(GenAIClient):
                 )
 
             # Build request payload with llama.cpp native options
-            payload = {
+            payload: dict[str, Any] = {
                 "model": self.genai_config.model,
                 "messages": [
                     {
@@ -341,6 +354,9 @@ class LlamaCppClient(GenAIClient):
             if response_format:
                 payload["response_format"] = response_format
 
+            if self.supports_toggleable_thinking:
+                payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
+
             response = requests.post(
                 f"{self.provider}/v1/chat/completions",
                 json=payload,
@@ -377,6 +393,10 @@ class LlamaCppClient(GenAIClient):
         """Whether the loaded model supports tool/function calling."""
         return self._supports_tools
 
+    @property
+    def supports_toggleable_thinking(self) -> bool:
+        return self._supports_reasoning
+
     def list_models(self) -> list[str]:
         """Return available model IDs from the llama.cpp server."""
         base_url = self.provider or (
@@ -504,6 +524,7 @@ class LlamaCppClient(GenAIClient):
         tools: Optional[list[dict[str, Any]]],
         tool_choice: Optional[str],
         stream: bool = False,
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """Build request payload for chat completions (sync or stream)."""
         openai_tool_choice = None
@@ -519,14 +540,21 @@ class LlamaCppClient(GenAIClient):
             "messages": messages,
             "model": self.genai_config.model,
         }
+
         if stream:
             payload["stream"] = True
             payload["stream_options"] = {"include_usage": True}
             payload["timings_per_token"] = True
+
         if tools:
             payload["tools"] = tools
+
             if openai_tool_choice is not None:
                 payload["tool_choice"] = openai_tool_choice
+
+        if enable_thinking is not None and self._supports_reasoning:
+            payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
+
         provider_opts = {
             k: v for k, v in self.provider_options.items() if k != "context_size"
         }
@@ -732,6 +760,7 @@ class LlamaCppClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """
         Send chat messages to llama.cpp server with optional tool definitions.
@@ -749,7 +778,13 @@ class LlamaCppClient(GenAIClient):
                 "finish_reason": "error",
             }
         try:
-            payload = self._build_payload(messages, tools, tool_choice, stream=False)
+            payload = self._build_payload(
+                messages,
+                tools,
+                tool_choice,
+                stream=False,
+                enable_thinking=enable_thinking,
+            )
             response = requests.post(
                 f"{self.provider}/v1/chat/completions",
                 json=payload,
@@ -797,6 +832,7 @@ class LlamaCppClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> AsyncGenerator[tuple[str, Any], None]:
         """Stream chat with tools via OpenAI-compatible streaming API."""
         if self.provider is None:
@@ -813,7 +849,13 @@ class LlamaCppClient(GenAIClient):
             )
             return
         try:
-            payload = self._build_payload(messages, tools, tool_choice, stream=True)
+            payload = self._build_payload(
+                messages,
+                tools,
+                tool_choice,
+                stream=True,
+                enable_thinking=enable_thinking,
+            )
             content_parts: list[str] = []
             reasoning_parts: list[str] = []
             tool_calls_by_index: dict[int, dict[str, Any]] = {}
diff --git a/frigate/genai/plugins/ollama.py b/frigate/genai/plugins/ollama.py
index 0f95dd3f9d..08176f524b 100644
--- a/frigate/genai/plugins/ollama.py
+++ b/frigate/genai/plugins/ollama.py
@@ -98,6 +98,22 @@ class OllamaClient(GenAIClient):
 
     provider: ApiClient | None
     provider_options: dict[str, Any]
+    _supports_thinking_cache: Optional[bool] = None
+
+    @property
+    def supports_toggleable_thinking(self) -> bool:
+        if self._supports_thinking_cache is not None:
+            return self._supports_thinking_cache
+        if self.provider is None:
+            return False
+        try:
+            response = self.provider.show(self.genai_config.model)
+            capabilities = response.get("capabilities") or []
+            self._supports_thinking_cache = "thinking" in capabilities
+        except Exception as e:
+            logger.debug("Failed to query Ollama model capabilities: %s", e)
+            self._supports_thinking_cache = False
+        return self._supports_thinking_cache
 
     def _auth_headers(self) -> dict | None:
         if self.genai_config.api_key:
@@ -178,6 +194,7 @@ class OllamaClient(GenAIClient):
         prompt: str,
         images: list[bytes],
         response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
     ) -> Optional[str]:
         """Submit a request to Ollama"""
         if self.provider is None:
@@ -194,6 +211,8 @@ class OllamaClient(GenAIClient):
                 schema = response_format.get("json_schema", {}).get("schema")
                 if schema:
                     ollama_options["format"] = self._clean_schema_for_ollama(schema)
+            if self.supports_toggleable_thinking:
+                ollama_options["think"] = enable_thinking
             logger.debug(
                 "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
                 "has_format=%s, options=%s",
@@ -274,6 +293,7 @@ class OllamaClient(GenAIClient):
         tools: Optional[list[dict[str, Any]]],
         tool_choice: Optional[str],
         stream: bool = False,
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """Build request_messages and params for chat (sync or stream)."""
         request_messages = []
@@ -318,6 +338,8 @@ class OllamaClient(GenAIClient):
             request_params["stream"] = True
         if tools:
             request_params["tools"] = tools
+        if enable_thinking is not None and self.supports_toggleable_thinking:
+            request_params["think"] = enable_thinking
         return request_params
 
     def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
@@ -365,6 +387,7 @@ class OllamaClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         if self.provider is None:
             logger.warning(
@@ -377,7 +400,11 @@ class OllamaClient(GenAIClient):
             }
         try:
             request_params = self._build_request_params(
-                messages, tools, tool_choice, stream=False
+                messages,
+                tools,
+                tool_choice,
+                stream=False,
+                enable_thinking=enable_thinking,
             )
             response = self.provider.chat(**request_params)
             return self._message_from_response(response)
@@ -401,6 +428,7 @@ class OllamaClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> AsyncGenerator[tuple[str, Any], None]:
         """Stream chat with tools; yields content deltas then final message.
 
@@ -430,7 +458,11 @@ class OllamaClient(GenAIClient):
                     "Ollama: tools provided, using non-streaming call for tool support"
                 )
                 request_params = self._build_request_params(
-                    messages, tools, tool_choice, stream=False
+                    messages,
+                    tools,
+                    tool_choice,
+                    stream=False,
+                    enable_thinking=enable_thinking,
                 )
                 async_client = OllamaAsyncClient(
                     host=self.genai_config.base_url,
@@ -452,7 +484,11 @@ class OllamaClient(GenAIClient):
                 return
 
             request_params = self._build_request_params(
-                messages, tools, tool_choice, stream=True
+                messages,
+                tools,
+                tool_choice,
+                stream=True,
+                enable_thinking=enable_thinking,
             )
             async_client = OllamaAsyncClient(
                 host=self.genai_config.base_url,
diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py
index f07e83b5dc..8d422bfb31 100644
--- a/frigate/genai/plugins/openai.py
+++ b/frigate/genai/plugins/openai.py
@@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
         prompt: str,
         images: list[bytes],
         response_format: Optional[dict] = None,
+        enable_thinking: bool = False,
     ) -> Optional[str]:
         """Submit a request to OpenAI."""
         encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
@@ -187,11 +188,14 @@ class OpenAIClient(GenAIClient):
         messages: list[dict[str, Any]],
         tools: Optional[list[dict[str, Any]]] = None,
         tool_choice: Optional[str] = "auto",
+        enable_thinking: Optional[bool] = None,
     ) -> dict[str, Any]:
         """
         Send chat messages to OpenAI with optional tool definitions.
 
-        Implements function calling/tool usage for OpenAI models.
+        Implements function calling/tool usage for OpenAI models. The OpenAI
+        chat completions API does not expose a per-request thinking toggle,
+        so ``enable_thinking`` is accepted for interface parity and ignored.
         """
         try:
             openai_tool_choice = None
diff --git a/web/public/locales/en/views/chat.json b/web/public/locales/en/views/chat.json
index 9e68551f03..363b0e68e4 100644
--- a/web/public/locales/en/views/chat.json
+++ b/web/public/locales/en/views/chat.json
@@ -65,5 +65,8 @@
     "active": "Reasoning…",
     "show": "Show reasoning",
     "hide": "Hide reasoning"
+  },
+  "thinking": {
+    "toggle": "Toggle thinking"
   }
 }
diff --git a/web/src/components/chat/ChatComposer.tsx b/web/src/components/chat/ChatComposer.tsx
new file mode 100644
index 0000000000..5ccfe93786
--- /dev/null
+++ b/web/src/components/chat/ChatComposer.tsx
@@ -0,0 +1,147 @@
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { FaArrowUpLong, FaStop } from "react-icons/fa6";
+import { LuBrain } from "react-icons/lu";
+import { useTranslation } from "react-i18next";
+import { cn } from "@/lib/utils";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipProvider,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
+import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
+import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
+import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
+
+type ChatComposerProps = {
+  input: string;
+  setInput: (value: string) => void;
+  sendMessage: (textOverride?: string) => void;
+  placeholder: string;
+
+  supportsThinking: boolean;
+  thinkingEnabled: boolean;
+  setThinkingEnabled: (value: boolean | undefined) => void;
+
+  isLoading?: boolean;
+  onStop?: () => void;
+
+  attachedEventId?: string | null;
+  onClearAttachment?: () => void;
+  onAttach?: (eventId: string) => void;
+  recentEventIds?: string[];
+
+  large?: boolean;
+};
+
+export function ChatComposer({
+  input,
+  setInput,
+  sendMessage,
+  placeholder,
+  supportsThinking,
+  thinkingEnabled,
+  setThinkingEnabled,
+  isLoading = false,
+  onStop,
+  attachedEventId,
+  onClearAttachment,
+  onAttach,
+  recentEventIds,
+  large = false,
+}: ChatComposerProps) {
+  const { t } = useTranslation(["views/chat"]);
+
+  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
+    if (e.key === "Enter" && !e.shiftKey) {
+      e.preventDefault();
+      sendMessage();
+    }
+  };
+
+  const showPaperclip = !!onAttach;
+  const showStop = isLoading && !!onStop;
+
+  return (
+    <div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
+      {attachedEventId && onClearAttachment && (
+        <div className="flex items-center">
+          <ChatAttachmentChip
+            eventId={attachedEventId}
+            mode="composer"
+            onRemove={onClearAttachment}
+          />
+        </div>
+      )}
+      {attachedEventId && (
+        <ChatQuickReplies
+          onSend={(text) => sendMessage(text)}
+          disabled={isLoading}
+        />
+      )}
+      <div className="flex w-full flex-row items-center gap-2">
+        {showPaperclip && (
+          <ChatPaperclipButton
+            recentEventIds={recentEventIds ?? []}
+            onAttach={onAttach!}
+            disabled={isLoading || attachedEventId != null}
+          />
+        )}
+        {supportsThinking && (
+          <TooltipProvider>
+            <Tooltip>
+              <TooltipTrigger asChild>
+                <Button
+                  type="button"
+                  size="sm"
+                  variant={thinkingEnabled ? "select" : "ghost"}
+                  aria-pressed={thinkingEnabled}
+                  aria-label={t("thinking.toggle")}
+                  className={cn(
+                    "flex size-9 shrink-0 items-center justify-center rounded-full p-0",
+                    !thinkingEnabled && "text-secondary-foreground",
+                  )}
+                  onClick={() => setThinkingEnabled(!thinkingEnabled)}
+                  disabled={isLoading}
+                >
+                  <LuBrain className="size-4" />
+                </Button>
+              </TooltipTrigger>
+              <TooltipContent>{t("thinking.toggle")}</TooltipContent>
+            </Tooltip>
+          </TooltipProvider>
+        )}
+        <Input
+          className={cn(
+            "w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent",
+            large && "h-12 text-base",
+          )}
+          placeholder={placeholder}
+          value={input}
+          onChange={(e) => setInput(e.target.value)}
+          onKeyDown={handleKeyDown}
+          aria-busy={isLoading}
+        />
+        {showStop ? (
+          <Button
+            variant="destructive"
+            className="size-10 shrink-0 rounded-full"
+            onClick={onStop}
+          >
+            <FaStop className="size-3" />
+          </Button>
+        ) : (
+          <Button
+            variant="select"
+            className="size-10 shrink-0 rounded-full"
+            disabled={!input.trim() || isLoading}
+            onClick={() => sendMessage()}
+          >
+            <FaArrowUpLong className="size-4" />
+          </Button>
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/web/src/components/chat/ChatStartingState.tsx b/web/src/components/chat/ChatStartingState.tsx
index a0a3a044c8..3e77677379 100644
--- a/web/src/components/chat/ChatStartingState.tsx
+++ b/web/src/components/chat/ChatStartingState.tsx
@@ -1,15 +1,22 @@
 import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import { FaArrowUpLong } from "react-icons/fa6";
 import { useTranslation } from "react-i18next";
 import { useState } from "react";
 import type { StartingRequest } from "@/types/chat";
+import { ChatComposer } from "@/components/chat/ChatComposer";
 
 type ChatStartingStateProps = {
   onSendMessage: (message: string) => void;
+  supportsThinking: boolean;
+  thinkingEnabled: boolean;
+  setThinkingEnabled: (value: boolean | undefined) => void;
 };
 
-export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
+export function ChatStartingState({
+  onSendMessage,
+  supportsThinking,
+  thinkingEnabled,
+  setThinkingEnabled,
+}: ChatStartingStateProps) {
   const { t } = useTranslation(["views/chat"]);
   const [input, setInput] = useState("");
 
@@ -36,20 +43,13 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
     onSendMessage(prompt);
   };
 
-  const handleSubmit = () => {
-    const text = input.trim();
+  const handleSend = (textOverride?: string) => {
+    const text = (textOverride ?? input).trim();
     if (!text) return;
     onSendMessage(text);
     setInput("");
   };
 
-  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
-    if (e.key === "Enter" && !e.shiftKey) {
-      e.preventDefault();
-      handleSubmit();
-    }
-  };
-
   return (
     <div className="flex size-full flex-col items-center justify-center gap-6 p-8">
       <div className="flex flex-col items-center gap-2">
@@ -77,22 +77,17 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
         </div>
       </div>
 
-      <div className="flex w-full max-w-2xl flex-row items-center gap-2 rounded-xl bg-secondary p-3">
-        <Input
-          className="h-12 w-full flex-1 border-transparent bg-transparent text-base shadow-none focus-visible:ring-0 dark:bg-transparent"
+      <div className="w-full max-w-2xl">
+        <ChatComposer
+          input={input}
+          setInput={setInput}
+          sendMessage={handleSend}
           placeholder={t("placeholder")}
-          value={input}
-          onChange={(e) => setInput(e.target.value)}
-          onKeyDown={handleKeyDown}
+          supportsThinking={supportsThinking}
+          thinkingEnabled={thinkingEnabled}
+          setThinkingEnabled={setThinkingEnabled}
+          large
         />
-        <Button
-          variant="select"
-          className="size-10 shrink-0 rounded-full"
-          disabled={!input.trim()}
-          onClick={handleSubmit}
-        >
-          <FaArrowUpLong size="18" />
-        </Button>
       </div>
     </div>
   );
diff --git a/web/src/components/chat/ReasoningBubble.tsx b/web/src/components/chat/ReasoningBubble.tsx
index dd7c8fe819..07dc7f5bec 100644
--- a/web/src/components/chat/ReasoningBubble.tsx
+++ b/web/src/components/chat/ReasoningBubble.tsx
@@ -8,6 +8,12 @@ import {
 } from "@/components/ui/collapsible";
 import { Button } from "@/components/ui/button";
 import { cn } from "@/lib/utils";
+import {
+  Tooltip,
+  TooltipContent,
+  TooltipProvider,
+  TooltipTrigger,
+} from "@/components/ui/tooltip";
 
 type ReasoningBubbleProps = {
   /** The accumulated reasoning text from the model. */
@@ -54,34 +60,42 @@ export function ReasoningBubble({
 
   return (
     <div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
-      <Collapsible open={open} onOpenChange={handleOpenChange}>
-        <CollapsibleTrigger asChild>
-          <Button
-            variant="ghost"
-            size="sm"
-            className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
-          >
-            <LuBrain
-              className={cn(
-                "size-3 shrink-0",
-                !answerStarted && "animate-pulse",
-              )}
-            />
-            <span className="break-words font-medium">{label}</span>
-            {answerStarted &&
-              (open ? (
-                <LuChevronDown className="ml-auto size-3 shrink-0" />
-              ) : (
-                <LuChevronRight className="ml-auto size-3 shrink-0" />
-              ))}
-          </Button>
-        </CollapsibleTrigger>
-        <CollapsibleContent>
-          <pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
-            {reasoning}
-          </pre>
-        </CollapsibleContent>
-      </Collapsible>
+      <TooltipProvider>
+        <Collapsible open={open} onOpenChange={handleOpenChange}>
+          <CollapsibleTrigger asChild>
+            <Button
+              variant="ghost"
+              size="sm"
+              className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
+            >
+              <Tooltip>
+                <TooltipTrigger asChild>
+                  <div className="flex items-center gap-2">
+                    <LuBrain
+                      className={cn(
+                        "size-3 shrink-0",
+                        !answerStarted && "animate-pulse",
+                      )}
+                    />
+                  </div>
+                </TooltipTrigger>
+                <TooltipContent>{label}</TooltipContent>
+              </Tooltip>
+              {answerStarted &&
+                (open ? (
+                  <LuChevronDown className="ml-auto size-3 shrink-0" />
+                ) : (
+                  <LuChevronRight className="ml-auto size-3 shrink-0" />
+                ))}
+            </Button>
+          </CollapsibleTrigger>
+          <CollapsibleContent>
+            <pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
+              {reasoning}
+            </pre>
+          </CollapsibleContent>
+        </Collapsible>
+      </TooltipProvider>
     </div>
   );
 }
diff --git a/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx b/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx
index 294d061166..ca5b30d29f 100644
--- a/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx
+++ b/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx
@@ -23,6 +23,7 @@ import {
   PopoverTrigger,
 } from "@/components/ui/popover";
 import type { ConfigFormContext, JsonObject } from "@/types/configForm";
+import type { GenAIModelsResponse } from "@/types/chat";
 import { getSizedFieldClassName } from "../utils";
 
 type ProbeResponse =
@@ -73,11 +74,12 @@ export function GenAIModelWidget(props: WidgetProps) {
     return `${e.provider ?? ""}|${e.base_url ?? ""}`;
   }, [providerKey, formContext?.fullConfig]);
 
-  const { data: allModels, mutate: mutateModels } = useSWR<
-    Record<string, string[]>
-  >("genai/models", {
-    revalidateOnFocus: false,
-  });
+  const { data: allModels, mutate: mutateModels } = useSWR<GenAIModelsResponse>(
+    "genai/models",
+    {
+      revalidateOnFocus: false,
+    },
+  );
 
   // Revalidate models when the saved config fingerprint changes (e.g. after
   // switching provider or base_url and saving).
@@ -89,9 +91,9 @@ export function GenAIModelWidget(props: WidgetProps) {
     }
   }, [configFingerprint, mutateModels]);
 
-  const fetchedModels = useMemo(() => {
+  const fetchedModels = useMemo<string[]>(() => {
     if (!allModels || !providerKey) return [];
-    return allModels[providerKey] ?? [];
+    return allModels[providerKey]?.models ?? [];
   }, [allModels, providerKey]);
 
   const [probeStatus, setProbeStatus] = useState<ProbeStatus>("idle");
diff --git a/web/src/pages/Chat.tsx b/web/src/pages/Chat.tsx
index 4621c97540..7103a189d1 100644
--- a/web/src/pages/Chat.tsx
+++ b/web/src/pages/Chat.tsx
@@ -1,20 +1,21 @@
 import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import { FaArrowUpLong, FaStop } from "react-icons/fa6";
 import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu";
 import { useTranslation } from "react-i18next";
 import { useState, useCallback, useRef, useEffect, useMemo } from "react";
 import axios from "axios";
+import useSWR from "swr";
 import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
 import { MessageBubble } from "@/components/chat/ChatMessage";
 import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
 import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
 import { ChatStartingState } from "@/components/chat/ChatStartingState";
-import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
-import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
-import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
+import { ChatComposer } from "@/components/chat/ChatComposer";
 import ChatSettings from "@/components/chat/ChatSettings";
-import type { ChatMessage, ShowStatsMode } from "@/types/chat";
+import type {
+  ChatMessage,
+  GenAIModelsResponse,
+  ShowStatsMode,
+} from "@/types/chat";
 import { usePersistence } from "@/hooks/use-persistence";
 import {
   getEventIdsFromSearchObjectsToolCalls,
@@ -38,9 +39,26 @@ export default function ChatPage() {
     "chat-auto-scroll",
     true,
   );
+  const [thinkingEnabled, setThinkingEnabled] = usePersistence<boolean>(
+    "chat-thinking-enabled",
+    false,
+  );
   const scrollRef = useRef<HTMLDivElement>(null);
   const abortRef = useRef<AbortController | null>(null);
 
+  const { data: genaiInfo } = useSWR<GenAIModelsResponse>("genai/models", {
+    revalidateOnFocus: false,
+  });
+  const supportsThinking = useMemo(() => {
+    if (!genaiInfo) return false;
+    for (const entry of Object.values(genaiInfo)) {
+      if (entry.roles?.includes("chat") && entry.supports_toggleable_thinking) {
+        return true;
+      }
+    }
+    return false;
+  }, [genaiInfo]);
+
   useEffect(() => {
     document.title = t("documentTitle");
   }, [t]);
@@ -100,9 +118,10 @@ export default function ChatPage() {
           defaultErrorMessage: t("error"),
         },
         controller.signal,
+        supportsThinking ? { enableThinking: !!thinkingEnabled } : {},
       );
     },
-    [isLoading, t],
+    [isLoading, supportsThinking, t, thinkingEnabled],
   );
 
   const recentEventIds = useMemo(() => {
@@ -305,6 +324,9 @@ export default function ChatPage() {
                   setInput("");
                   submitConversation([{ role: "user", content: message }]);
                 }}
+                supportsThinking={supportsThinking}
+                thinkingEnabled={!!thinkingEnabled}
+                setThinkingEnabled={setThinkingEnabled}
               />
             )}
           </div>
@@ -313,7 +335,7 @@ export default function ChatPage() {
       {hasStarted && (
         <div className="flex shrink-0 justify-center p-2 md:px-4 md:pb-4">
           <div className="flex w-full xl:w-[50%] 3xl:w-[35%]">
-            <ChatEntry
+            <ChatComposer
               input={input}
               setInput={setInput}
               sendMessage={sendMessage}
@@ -324,6 +346,9 @@ export default function ChatPage() {
               onAttach={setAttachedEventId}
               onStop={stopGeneration}
               recentEventIds={recentEventIds}
+              supportsThinking={supportsThinking}
+              thinkingEnabled={!!thinkingEnabled}
+              setThinkingEnabled={setThinkingEnabled}
             />
           </div>
         </div>
@@ -331,89 +356,3 @@ export default function ChatPage() {
     </div>
   );
 }
-
-type ChatEntryProps = {
-  input: string;
-  setInput: (value: string) => void;
-  sendMessage: (textOverride?: string) => void;
-  isLoading: boolean;
-  placeholder: string;
-  attachedEventId: string | null;
-  onClearAttachment: () => void;
-  onAttach: (eventId: string) => void;
-  onStop: () => void;
-  recentEventIds: string[];
-};
-
-function ChatEntry({
-  input,
-  setInput,
-  sendMessage,
-  isLoading,
-  placeholder,
-  attachedEventId,
-  onClearAttachment,
-  onAttach,
-  onStop,
-  recentEventIds,
-}: ChatEntryProps) {
-  const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
-    if (e.key === "Enter" && !e.shiftKey) {
-      e.preventDefault();
-      sendMessage();
-    }
-  };
-
-  return (
-    <div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
-      {attachedEventId && (
-        <div className="flex items-center">
-          <ChatAttachmentChip
-            eventId={attachedEventId}
-            mode="composer"
-            onRemove={onClearAttachment}
-          />
-        </div>
-      )}
-      {attachedEventId && (
-        <ChatQuickReplies
-          onSend={(text) => sendMessage(text)}
-          disabled={isLoading}
-        />
-      )}
-      <div className="flex w-full flex-row items-center gap-2">
-        <ChatPaperclipButton
-          recentEventIds={recentEventIds}
-          onAttach={onAttach}
-          disabled={isLoading || attachedEventId != null}
-        />
-        <Input
-          className="w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent"
-          placeholder={placeholder}
-          value={input}
-          onChange={(e) => setInput(e.target.value)}
-          onKeyDown={handleKeyDown}
-          aria-busy={isLoading}
-        />
-        {isLoading ? (
-          <Button
-            variant="destructive"
-            className="size-10 shrink-0 rounded-full"
-            onClick={onStop}
-          >
-            <FaStop className="size-3" />
-          </Button>
-        ) : (
-          <Button
-            variant="select"
-            className="size-10 shrink-0 rounded-full"
-            disabled={!input.trim()}
-            onClick={() => sendMessage()}
-          >
-            <FaArrowUpLong className="size-4" />
-          </Button>
-        )}
-      </div>
-    </div>
-  );
-}
diff --git a/web/src/types/chat.ts b/web/src/types/chat.ts
index db6d84bf58..81c16820ff 100644
--- a/web/src/types/chat.ts
+++ b/web/src/types/chat.ts
@@ -25,3 +25,11 @@ export type ChatStats = {
 };
 
 export type ShowStatsMode = "while_generating" | "always";
+
+export type GenAIProviderInfo = {
+  models: string[];
+  roles: string[];
+  supports_toggleable_thinking: boolean;
+};
+
+export type GenAIModelsResponse = Record<string, GenAIProviderInfo>;
diff --git a/web/src/utils/chatUtil.ts b/web/src/utils/chatUtil.ts
index 5389f7aff8..73e5c213b6 100644
--- a/web/src/utils/chatUtil.ts
+++ b/web/src/utils/chatUtil.ts
@@ -34,12 +34,17 @@ type StreamChunk =
  * POST to chat/completion with stream: true, parse NDJSON stream, and invoke
  * callbacks so the caller can update UI (e.g. React state).
  */
+export type StreamChatOptions = {
+  enableThinking?: boolean;
+};
+
 export async function streamChatCompletion(
   url: string,
   headers: Record<string, string>,
   apiMessages: { role: string; content: string }[],
   callbacks: StreamChatCallbacks,
   signal?: AbortSignal,
+  options: StreamChatOptions = {},
 ): Promise<void> {
   const {
     updateMessages,
@@ -50,10 +55,17 @@ export async function streamChatCompletion(
   } = callbacks;
 
   try {
+    const body: Record<string, unknown> = {
+      messages: apiMessages,
+      stream: true,
+    };
+    if (options.enableThinking !== undefined) {
+      body.enable_thinking = options.enableThinking;
+    }
     const res = await fetch(url, {
       method: "POST",
       headers,
-      body: JSON.stringify({ messages: apiMessages, stream: true }),
+      body: JSON.stringify(body),
       signal,
     });