From 66a2417229e3131834946314d323de155ad385c7 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Thu, 21 May 2026 11:54:23 -0600 Subject: [PATCH] Support Dynamic Thinking Models (#23281) * Add ability to toggle thinking * Disable thinking for descriptions automatically * mypy * Cleanup --- frigate/api/chat.py | 2 + frigate/api/defs/request/chat_body.py | 7 + frigate/genai/__init__.py | 19 ++- frigate/genai/manager.py | 19 ++- frigate/genai/plugins/gemini.py | 6 +- frigate/genai/plugins/llama_cpp.py | 52 ++++++- frigate/genai/plugins/ollama.py | 42 ++++- frigate/genai/plugins/openai.py | 6 +- web/public/locales/en/views/chat.json | 3 + web/src/components/chat/ChatComposer.tsx | 147 ++++++++++++++++++ web/src/components/chat/ChatStartingState.tsx | 47 +++--- web/src/components/chat/ReasoningBubble.tsx | 70 +++++---- .../theme/widgets/GenAIModelWidget.tsx | 16 +- web/src/pages/Chat.tsx | 127 ++++----------- web/src/types/chat.ts | 8 + web/src/utils/chatUtil.ts | 14 +- 16 files changed, 410 insertions(+), 175 deletions(-) create mode 100644 web/src/components/chat/ChatComposer.tsx diff --git a/frigate/api/chat.py b/frigate/api/chat.py index 291503dbba..c7d197bf91 100644 --- a/frigate/api/chat.py +++ b/frigate/api/chat.py @@ -1173,6 +1173,7 @@ async def chat_completion( messages=conversation, tools=tools if tools else None, tool_choice="auto", + enable_thinking=body.enable_thinking, ): if await request.is_disconnected(): logger.debug("Client disconnected, stopping chat stream") @@ -1267,6 +1268,7 @@ async def chat_completion( messages=conversation, tools=tools if tools else None, tool_choice="auto", + enable_thinking=body.enable_thinking, ) if response.get("finish_reason") == "error": diff --git a/frigate/api/defs/request/chat_body.py b/frigate/api/defs/request/chat_body.py index 79ca3a6fef..228781c80b 100644 --- a/frigate/api/defs/request/chat_body.py +++ b/frigate/api/defs/request/chat_body.py @@ -36,3 +36,10 @@ class ChatCompletionRequest(BaseModel): default=False, description="If true, stream the final assistant response in the body as newline-delimited JSON.", ) + enable_thinking: Optional[bool] = Field( + default=None, + description=( + "Per-request thinking toggle. None means use the provider default. " + "Ignored by providers that do not expose a per-request thinking switch." + ), + ) diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index 28a6844d95..cf51550b47 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -222,8 +222,15 @@ class GenAIClient: prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: - """Submit a request to the provider.""" + """Submit a request to the provider. + + ``enable_thinking`` is honored only by providers that report + ``supports_toggleable_thinking``. Description-style callers leave it + at the default (off) since synthesis tasks don't benefit from + reasoning traces. + """ return None @property @@ -235,6 +242,11 @@ class GenAIClient: """ return True + @property + def supports_toggleable_thinking(self) -> bool: + """Whether the configured model exposes a per-request thinking toggle.""" + return False + def list_models(self) -> list[str]: """Return the list of model names available from this provider. @@ -278,6 +290,7 @@ class GenAIClient: messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """ Send chat messages to LLM with optional tool definitions. @@ -301,7 +314,9 @@ class GenAIClient: - 'none': Model must not call tools - 'required': Model must call at least one tool - Or a dict specifying a specific tool to call - **kwargs: Additional provider-specific parameters. + enable_thinking: Per-request thinking toggle. None means use the + provider default. Ignored by providers without a per-request + toggle (see `supports_toggleable_thinking`). Returns: Dictionary with: diff --git a/frigate/genai/manager.py b/frigate/genai/manager.py index 94719f4291..a1325d3279 100644 --- a/frigate/genai/manager.py +++ b/frigate/genai/manager.py @@ -6,7 +6,7 @@ no chat feature is active) are never initialized. """ import logging -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional from frigate.config import FrigateConfig from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum @@ -108,11 +108,16 @@ class GenAIClientManager: name = self._role_map.get(GenAIRoleEnum.embeddings) return self._get_client(name) if name else None - def list_models(self) -> dict[str, list[str]]: - """Return available models keyed by config entry name.""" - result: dict[str, list[str]] = {} - for name in self._configs: + def list_models(self) -> dict[str, dict[str, Any]]: + """Return per-entry model lists and capabilities, keyed by config entry name.""" + result: dict[str, dict[str, Any]] = {} + for name, genai_cfg in self._configs.items(): client = self._get_client(name) - if client: - result[name] = client.list_models() + if not client: + continue + result[name] = { + "models": client.list_models(), + "roles": [r.value for r in genai_cfg.roles], + "supports_toggleable_thinking": client.supports_toggleable_thinking, + } return result diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py index bcac09d0e3..6e4b9283fb 100644 --- a/frigate/genai/plugins/gemini.py +++ b/frigate/genai/plugins/gemini.py @@ -62,6 +62,7 @@ class GeminiClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to Gemini.""" contents = [prompt] + [ @@ -119,11 +120,14 @@ class GeminiClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """ Send chat messages to Gemini with optional tool definitions. - Implements function calling/tool usage for Gemini models. + Implements function calling/tool usage for Gemini models. Thinking is + configured at the model level for Gemini, so ``enable_thinking`` is + accepted for interface parity and ignored. """ try: # Convert messages to Gemini format diff --git a/frigate/genai/plugins/llama_cpp.py b/frigate/genai/plugins/llama_cpp.py index 2dddf5244e..d5458cf8f9 100644 --- a/frigate/genai/plugins/llama_cpp.py +++ b/frigate/genai/plugins/llama_cpp.py @@ -122,6 +122,7 @@ class LlamaCppClient(GenAIClient): _supports_vision: bool _supports_audio: bool _supports_tools: bool + _supports_reasoning: bool _image_token_cache: dict[tuple[int, int], int] _text_baseline_tokens: int | None _media_marker: str @@ -135,6 +136,7 @@ class LlamaCppClient(GenAIClient): self._supports_vision = False self._supports_audio = False self._supports_tools = False + self._supports_reasoning = False self._image_token_cache = {} self._text_baseline_tokens = None self._media_marker = "<__media__>" @@ -164,15 +166,17 @@ class LlamaCppClient(GenAIClient): self._supports_vision = info["supports_vision"] self._supports_audio = info["supports_audio"] self._supports_tools = info["supports_tools"] + self._supports_reasoning = info["supports_reasoning"] self._media_marker = info["media_marker"] logger.info( - "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s", + "llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s, reasoning: %s", configured_model, self._context_size or "unknown", self._supports_vision, self._supports_audio, self._supports_tools, + self._supports_reasoning, ) return base_url @@ -200,6 +204,7 @@ class LlamaCppClient(GenAIClient): "supports_vision": False, "supports_audio": False, "supports_tools": False, + "supports_reasoning": False, "media_marker": "<__media__>", } @@ -279,10 +284,17 @@ class LlamaCppClient(GenAIClient): info["supports_vision"] = bool(modalities.get("vision", False)) info["supports_audio"] = bool(modalities.get("audio", False)) + chat_caps = props.get("chat_template_caps") or {} + if not info["supports_tools"]: - chat_caps = props.get("chat_template_caps", {}) info["supports_tools"] = bool(chat_caps.get("supports_tools", False)) + # llama.cpp does not advertise per-template reasoning support, so + # detect it by looking for the `enable_thinking` toggle variable + # in the Jinja chat template itself. + chat_template = props.get("chat_template") or "" + info["supports_reasoning"] = "enable_thinking" in chat_template + media_marker = props.get("media_marker") if isinstance(media_marker, str) and media_marker: info["media_marker"] = media_marker @@ -300,6 +312,7 @@ class LlamaCppClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to llama.cpp server.""" if self.provider is None: @@ -327,7 +340,7 @@ class LlamaCppClient(GenAIClient): ) # Build request payload with llama.cpp native options - payload = { + payload: dict[str, Any] = { "model": self.genai_config.model, "messages": [ { @@ -341,6 +354,9 @@ class LlamaCppClient(GenAIClient): if response_format: payload["response_format"] = response_format + if self.supports_toggleable_thinking: + payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking} + response = requests.post( f"{self.provider}/v1/chat/completions", json=payload, @@ -377,6 +393,10 @@ class LlamaCppClient(GenAIClient): """Whether the loaded model supports tool/function calling.""" return self._supports_tools + @property + def supports_toggleable_thinking(self) -> bool: + return self._supports_reasoning + def list_models(self) -> list[str]: """Return available model IDs from the llama.cpp server.""" base_url = self.provider or ( @@ -504,6 +524,7 @@ class LlamaCppClient(GenAIClient): tools: Optional[list[dict[str, Any]]], tool_choice: Optional[str], stream: bool = False, + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """Build request payload for chat completions (sync or stream).""" openai_tool_choice = None @@ -519,14 +540,21 @@ class LlamaCppClient(GenAIClient): "messages": messages, "model": self.genai_config.model, } + if stream: payload["stream"] = True payload["stream_options"] = {"include_usage": True} payload["timings_per_token"] = True + if tools: payload["tools"] = tools + if openai_tool_choice is not None: payload["tool_choice"] = openai_tool_choice + + if enable_thinking is not None and self._supports_reasoning: + payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking} + provider_opts = { k: v for k, v in self.provider_options.items() if k != "context_size" } @@ -732,6 +760,7 @@ class LlamaCppClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """ Send chat messages to llama.cpp server with optional tool definitions. @@ -749,7 +778,13 @@ class LlamaCppClient(GenAIClient): "finish_reason": "error", } try: - payload = self._build_payload(messages, tools, tool_choice, stream=False) + payload = self._build_payload( + messages, + tools, + tool_choice, + stream=False, + enable_thinking=enable_thinking, + ) response = requests.post( f"{self.provider}/v1/chat/completions", json=payload, @@ -797,6 +832,7 @@ class LlamaCppClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> AsyncGenerator[tuple[str, Any], None]: """Stream chat with tools via OpenAI-compatible streaming API.""" if self.provider is None: @@ -813,7 +849,13 @@ class LlamaCppClient(GenAIClient): ) return try: - payload = self._build_payload(messages, tools, tool_choice, stream=True) + payload = self._build_payload( + messages, + tools, + tool_choice, + stream=True, + enable_thinking=enable_thinking, + ) content_parts: list[str] = [] reasoning_parts: list[str] = [] tool_calls_by_index: dict[int, dict[str, Any]] = {} diff --git a/frigate/genai/plugins/ollama.py b/frigate/genai/plugins/ollama.py index 0f95dd3f9d..08176f524b 100644 --- a/frigate/genai/plugins/ollama.py +++ b/frigate/genai/plugins/ollama.py @@ -98,6 +98,22 @@ class OllamaClient(GenAIClient): provider: ApiClient | None provider_options: dict[str, Any] + _supports_thinking_cache: Optional[bool] = None + + @property + def supports_toggleable_thinking(self) -> bool: + if self._supports_thinking_cache is not None: + return self._supports_thinking_cache + if self.provider is None: + return False + try: + response = self.provider.show(self.genai_config.model) + capabilities = response.get("capabilities") or [] + self._supports_thinking_cache = "thinking" in capabilities + except Exception as e: + logger.debug("Failed to query Ollama model capabilities: %s", e) + self._supports_thinking_cache = False + return self._supports_thinking_cache def _auth_headers(self) -> dict | None: if self.genai_config.api_key: @@ -178,6 +194,7 @@ class OllamaClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to Ollama""" if self.provider is None: @@ -194,6 +211,8 @@ class OllamaClient(GenAIClient): schema = response_format.get("json_schema", {}).get("schema") if schema: ollama_options["format"] = self._clean_schema_for_ollama(schema) + if self.supports_toggleable_thinking: + ollama_options["think"] = enable_thinking logger.debug( "Ollama generate request: model=%s, prompt_len=%s, image_count=%s, " "has_format=%s, options=%s", @@ -274,6 +293,7 @@ class OllamaClient(GenAIClient): tools: Optional[list[dict[str, Any]]], tool_choice: Optional[str], stream: bool = False, + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """Build request_messages and params for chat (sync or stream).""" request_messages = [] @@ -318,6 +338,8 @@ class OllamaClient(GenAIClient): request_params["stream"] = True if tools: request_params["tools"] = tools + if enable_thinking is not None and self.supports_toggleable_thinking: + request_params["think"] = enable_thinking return request_params def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]: @@ -365,6 +387,7 @@ class OllamaClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: if self.provider is None: logger.warning( @@ -377,7 +400,11 @@ class OllamaClient(GenAIClient): } try: request_params = self._build_request_params( - messages, tools, tool_choice, stream=False + messages, + tools, + tool_choice, + stream=False, + enable_thinking=enable_thinking, ) response = self.provider.chat(**request_params) return self._message_from_response(response) @@ -401,6 +428,7 @@ class OllamaClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> AsyncGenerator[tuple[str, Any], None]: """Stream chat with tools; yields content deltas then final message. @@ -430,7 +458,11 @@ class OllamaClient(GenAIClient): "Ollama: tools provided, using non-streaming call for tool support" ) request_params = self._build_request_params( - messages, tools, tool_choice, stream=False + messages, + tools, + tool_choice, + stream=False, + enable_thinking=enable_thinking, ) async_client = OllamaAsyncClient( host=self.genai_config.base_url, @@ -452,7 +484,11 @@ class OllamaClient(GenAIClient): return request_params = self._build_request_params( - messages, tools, tool_choice, stream=True + messages, + tools, + tool_choice, + stream=True, + enable_thinking=enable_thinking, ) async_client = OllamaAsyncClient( host=self.genai_config.base_url, diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py index f07e83b5dc..8d422bfb31 100644 --- a/frigate/genai/plugins/openai.py +++ b/frigate/genai/plugins/openai.py @@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient): prompt: str, images: list[bytes], response_format: Optional[dict] = None, + enable_thinking: bool = False, ) -> Optional[str]: """Submit a request to OpenAI.""" encoded_images = [base64.b64encode(image).decode("utf-8") for image in images] @@ -187,11 +188,14 @@ class OpenAIClient(GenAIClient): messages: list[dict[str, Any]], tools: Optional[list[dict[str, Any]]] = None, tool_choice: Optional[str] = "auto", + enable_thinking: Optional[bool] = None, ) -> dict[str, Any]: """ Send chat messages to OpenAI with optional tool definitions. - Implements function calling/tool usage for OpenAI models. + Implements function calling/tool usage for OpenAI models. The OpenAI + chat completions API does not expose a per-request thinking toggle, + so ``enable_thinking`` is accepted for interface parity and ignored. """ try: openai_tool_choice = None diff --git a/web/public/locales/en/views/chat.json b/web/public/locales/en/views/chat.json index 9e68551f03..363b0e68e4 100644 --- a/web/public/locales/en/views/chat.json +++ b/web/public/locales/en/views/chat.json @@ -65,5 +65,8 @@ "active": "Reasoning…", "show": "Show reasoning", "hide": "Hide reasoning" + }, + "thinking": { + "toggle": "Toggle thinking" } } diff --git a/web/src/components/chat/ChatComposer.tsx b/web/src/components/chat/ChatComposer.tsx new file mode 100644 index 0000000000..5ccfe93786 --- /dev/null +++ b/web/src/components/chat/ChatComposer.tsx @@ -0,0 +1,147 @@ +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { FaArrowUpLong, FaStop } from "react-icons/fa6"; +import { LuBrain } from "react-icons/lu"; +import { useTranslation } from "react-i18next"; +import { cn } from "@/lib/utils"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip"; +import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies"; +import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton"; + +type ChatComposerProps = { + input: string; + setInput: (value: string) => void; + sendMessage: (textOverride?: string) => void; + placeholder: string; + + supportsThinking: boolean; + thinkingEnabled: boolean; + setThinkingEnabled: (value: boolean | undefined) => void; + + isLoading?: boolean; + onStop?: () => void; + + attachedEventId?: string | null; + onClearAttachment?: () => void; + onAttach?: (eventId: string) => void; + recentEventIds?: string[]; + + large?: boolean; +}; + +export function ChatComposer({ + input, + setInput, + sendMessage, + placeholder, + supportsThinking, + thinkingEnabled, + setThinkingEnabled, + isLoading = false, + onStop, + attachedEventId, + onClearAttachment, + onAttach, + recentEventIds, + large = false, +}: ChatComposerProps) { + const { t } = useTranslation(["views/chat"]); + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + sendMessage(); + } + }; + + const showPaperclip = !!onAttach; + const showStop = isLoading && !!onStop; + + return ( +
+ {attachedEventId && onClearAttachment && ( +
+ +
+ )} + {attachedEventId && ( + sendMessage(text)} + disabled={isLoading} + /> + )} +
+ {showPaperclip && ( + + )} + {supportsThinking && ( + + + + + + {t("thinking.toggle")} + + + )} + setInput(e.target.value)} + onKeyDown={handleKeyDown} + aria-busy={isLoading} + /> + {showStop ? ( + + ) : ( + + )} +
+
+ ); +} diff --git a/web/src/components/chat/ChatStartingState.tsx b/web/src/components/chat/ChatStartingState.tsx index a0a3a044c8..3e77677379 100644 --- a/web/src/components/chat/ChatStartingState.tsx +++ b/web/src/components/chat/ChatStartingState.tsx @@ -1,15 +1,22 @@ import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import { FaArrowUpLong } from "react-icons/fa6"; import { useTranslation } from "react-i18next"; import { useState } from "react"; import type { StartingRequest } from "@/types/chat"; +import { ChatComposer } from "@/components/chat/ChatComposer"; type ChatStartingStateProps = { onSendMessage: (message: string) => void; + supportsThinking: boolean; + thinkingEnabled: boolean; + setThinkingEnabled: (value: boolean | undefined) => void; }; -export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) { +export function ChatStartingState({ + onSendMessage, + supportsThinking, + thinkingEnabled, + setThinkingEnabled, +}: ChatStartingStateProps) { const { t } = useTranslation(["views/chat"]); const [input, setInput] = useState(""); @@ -36,20 +43,13 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) { onSendMessage(prompt); }; - const handleSubmit = () => { - const text = input.trim(); + const handleSend = (textOverride?: string) => { + const text = (textOverride ?? input).trim(); if (!text) return; onSendMessage(text); setInput(""); }; - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - handleSubmit(); - } - }; - return (
@@ -77,22 +77,17 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
-
- + setInput(e.target.value)} - onKeyDown={handleKeyDown} + supportsThinking={supportsThinking} + thinkingEnabled={thinkingEnabled} + setThinkingEnabled={setThinkingEnabled} + large /> -
); diff --git a/web/src/components/chat/ReasoningBubble.tsx b/web/src/components/chat/ReasoningBubble.tsx index dd7c8fe819..07dc7f5bec 100644 --- a/web/src/components/chat/ReasoningBubble.tsx +++ b/web/src/components/chat/ReasoningBubble.tsx @@ -8,6 +8,12 @@ import { } from "@/components/ui/collapsible"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; type ReasoningBubbleProps = { /** The accumulated reasoning text from the model. */ @@ -54,34 +60,42 @@ export function ReasoningBubble({ return (
- - - - - -
-            {reasoning}
-          
-
-
+ + + + + + +
+              {reasoning}
+            
+
+
+
); } diff --git a/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx b/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx index 294d061166..ca5b30d29f 100644 --- a/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx +++ b/web/src/components/config-form/theme/widgets/GenAIModelWidget.tsx @@ -23,6 +23,7 @@ import { PopoverTrigger, } from "@/components/ui/popover"; import type { ConfigFormContext, JsonObject } from "@/types/configForm"; +import type { GenAIModelsResponse } from "@/types/chat"; import { getSizedFieldClassName } from "../utils"; type ProbeResponse = @@ -73,11 +74,12 @@ export function GenAIModelWidget(props: WidgetProps) { return `${e.provider ?? ""}|${e.base_url ?? ""}`; }, [providerKey, formContext?.fullConfig]); - const { data: allModels, mutate: mutateModels } = useSWR< - Record - >("genai/models", { - revalidateOnFocus: false, - }); + const { data: allModels, mutate: mutateModels } = useSWR( + "genai/models", + { + revalidateOnFocus: false, + }, + ); // Revalidate models when the saved config fingerprint changes (e.g. after // switching provider or base_url and saving). @@ -89,9 +91,9 @@ export function GenAIModelWidget(props: WidgetProps) { } }, [configFingerprint, mutateModels]); - const fetchedModels = useMemo(() => { + const fetchedModels = useMemo(() => { if (!allModels || !providerKey) return []; - return allModels[providerKey] ?? []; + return allModels[providerKey]?.models ?? []; }, [allModels, providerKey]); const [probeStatus, setProbeStatus] = useState("idle"); diff --git a/web/src/pages/Chat.tsx b/web/src/pages/Chat.tsx index 4621c97540..7103a189d1 100644 --- a/web/src/pages/Chat.tsx +++ b/web/src/pages/Chat.tsx @@ -1,20 +1,21 @@ import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; -import { FaArrowUpLong, FaStop } from "react-icons/fa6"; import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu"; import { useTranslation } from "react-i18next"; import { useState, useCallback, useRef, useEffect, useMemo } from "react"; import axios from "axios"; +import useSWR from "swr"; import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow"; import { MessageBubble } from "@/components/chat/ChatMessage"; import { ReasoningBubble } from "@/components/chat/ReasoningBubble"; import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup"; import { ChatStartingState } from "@/components/chat/ChatStartingState"; -import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip"; -import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies"; -import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton"; +import { ChatComposer } from "@/components/chat/ChatComposer"; import ChatSettings from "@/components/chat/ChatSettings"; -import type { ChatMessage, ShowStatsMode } from "@/types/chat"; +import type { + ChatMessage, + GenAIModelsResponse, + ShowStatsMode, +} from "@/types/chat"; import { usePersistence } from "@/hooks/use-persistence"; import { getEventIdsFromSearchObjectsToolCalls, @@ -38,9 +39,26 @@ export default function ChatPage() { "chat-auto-scroll", true, ); + const [thinkingEnabled, setThinkingEnabled] = usePersistence( + "chat-thinking-enabled", + false, + ); const scrollRef = useRef(null); const abortRef = useRef(null); + const { data: genaiInfo } = useSWR("genai/models", { + revalidateOnFocus: false, + }); + const supportsThinking = useMemo(() => { + if (!genaiInfo) return false; + for (const entry of Object.values(genaiInfo)) { + if (entry.roles?.includes("chat") && entry.supports_toggleable_thinking) { + return true; + } + } + return false; + }, [genaiInfo]); + useEffect(() => { document.title = t("documentTitle"); }, [t]); @@ -100,9 +118,10 @@ export default function ChatPage() { defaultErrorMessage: t("error"), }, controller.signal, + supportsThinking ? { enableThinking: !!thinkingEnabled } : {}, ); }, - [isLoading, t], + [isLoading, supportsThinking, t, thinkingEnabled], ); const recentEventIds = useMemo(() => { @@ -305,6 +324,9 @@ export default function ChatPage() { setInput(""); submitConversation([{ role: "user", content: message }]); }} + supportsThinking={supportsThinking} + thinkingEnabled={!!thinkingEnabled} + setThinkingEnabled={setThinkingEnabled} /> )} @@ -313,7 +335,7 @@ export default function ChatPage() { {hasStarted && (
-
@@ -331,89 +356,3 @@ export default function ChatPage() { ); } - -type ChatEntryProps = { - input: string; - setInput: (value: string) => void; - sendMessage: (textOverride?: string) => void; - isLoading: boolean; - placeholder: string; - attachedEventId: string | null; - onClearAttachment: () => void; - onAttach: (eventId: string) => void; - onStop: () => void; - recentEventIds: string[]; -}; - -function ChatEntry({ - input, - setInput, - sendMessage, - isLoading, - placeholder, - attachedEventId, - onClearAttachment, - onAttach, - onStop, - recentEventIds, -}: ChatEntryProps) { - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - sendMessage(); - } - }; - - return ( -
- {attachedEventId && ( -
- -
- )} - {attachedEventId && ( - sendMessage(text)} - disabled={isLoading} - /> - )} -
- - setInput(e.target.value)} - onKeyDown={handleKeyDown} - aria-busy={isLoading} - /> - {isLoading ? ( - - ) : ( - - )} -
-
- ); -} diff --git a/web/src/types/chat.ts b/web/src/types/chat.ts index db6d84bf58..81c16820ff 100644 --- a/web/src/types/chat.ts +++ b/web/src/types/chat.ts @@ -25,3 +25,11 @@ export type ChatStats = { }; export type ShowStatsMode = "while_generating" | "always"; + +export type GenAIProviderInfo = { + models: string[]; + roles: string[]; + supports_toggleable_thinking: boolean; +}; + +export type GenAIModelsResponse = Record; diff --git a/web/src/utils/chatUtil.ts b/web/src/utils/chatUtil.ts index 5389f7aff8..73e5c213b6 100644 --- a/web/src/utils/chatUtil.ts +++ b/web/src/utils/chatUtil.ts @@ -34,12 +34,17 @@ type StreamChunk = * POST to chat/completion with stream: true, parse NDJSON stream, and invoke * callbacks so the caller can update UI (e.g. React state). */ +export type StreamChatOptions = { + enableThinking?: boolean; +}; + export async function streamChatCompletion( url: string, headers: Record, apiMessages: { role: string; content: string }[], callbacks: StreamChatCallbacks, signal?: AbortSignal, + options: StreamChatOptions = {}, ): Promise { const { updateMessages, @@ -50,10 +55,17 @@ export async function streamChatCompletion( } = callbacks; try { + const body: Record = { + messages: apiMessages, + stream: true, + }; + if (options.enableThinking !== undefined) { + body.enable_thinking = options.enableThinking; + } const res = await fetch(url, { method: "POST", headers, - body: JSON.stringify({ messages: apiMessages, stream: true }), + body: JSON.stringify(body), signal, });