mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-06-21 03:41:55 +03:00
Support Dynamic Thinking Models (#23281)
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
Some checks are pending
CI / AMD64 Build (push) Waiting to run
CI / ARM Build (push) Waiting to run
CI / Jetson Jetpack 6 (push) Waiting to run
CI / AMD64 Extra Build (push) Blocked by required conditions
CI / ARM Extra Build (push) Blocked by required conditions
CI / Synaptics Build (push) Blocked by required conditions
CI / Assemble and push default build (push) Blocked by required conditions
* Add ability to toggle thinking * Disable thinking for descriptions automatically * mypy * Cleanup
This commit is contained in:
parent
555ef89800
commit
66a2417229
@ -1173,6 +1173,7 @@ async def chat_completion(
|
|||||||
messages=conversation,
|
messages=conversation,
|
||||||
tools=tools if tools else None,
|
tools=tools if tools else None,
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
|
enable_thinking=body.enable_thinking,
|
||||||
):
|
):
|
||||||
if await request.is_disconnected():
|
if await request.is_disconnected():
|
||||||
logger.debug("Client disconnected, stopping chat stream")
|
logger.debug("Client disconnected, stopping chat stream")
|
||||||
@ -1267,6 +1268,7 @@ async def chat_completion(
|
|||||||
messages=conversation,
|
messages=conversation,
|
||||||
tools=tools if tools else None,
|
tools=tools if tools else None,
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
|
enable_thinking=body.enable_thinking,
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.get("finish_reason") == "error":
|
if response.get("finish_reason") == "error":
|
||||||
|
|||||||
@ -36,3 +36,10 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
default=False,
|
default=False,
|
||||||
description="If true, stream the final assistant response in the body as newline-delimited JSON.",
|
description="If true, stream the final assistant response in the body as newline-delimited JSON.",
|
||||||
)
|
)
|
||||||
|
enable_thinking: Optional[bool] = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Per-request thinking toggle. None means use the provider default. "
|
||||||
|
"Ignored by providers that do not expose a per-request thinking switch."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|||||||
@ -222,8 +222,15 @@ class GenAIClient:
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
images: list[bytes],
|
images: list[bytes],
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
|
enable_thinking: bool = False,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Submit a request to the provider."""
|
"""Submit a request to the provider.
|
||||||
|
|
||||||
|
``enable_thinking`` is honored only by providers that report
|
||||||
|
``supports_toggleable_thinking``. Description-style callers leave it
|
||||||
|
at the default (off) since synthesis tasks don't benefit from
|
||||||
|
reasoning traces.
|
||||||
|
"""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -235,6 +242,11 @@ class GenAIClient:
|
|||||||
"""
|
"""
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_toggleable_thinking(self) -> bool:
|
||||||
|
"""Whether the configured model exposes a per-request thinking toggle."""
|
||||||
|
return False
|
||||||
|
|
||||||
def list_models(self) -> list[str]:
|
def list_models(self) -> list[str]:
|
||||||
"""Return the list of model names available from this provider.
|
"""Return the list of model names available from this provider.
|
||||||
|
|
||||||
@ -278,6 +290,7 @@ class GenAIClient:
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send chat messages to LLM with optional tool definitions.
|
Send chat messages to LLM with optional tool definitions.
|
||||||
@ -301,7 +314,9 @@ class GenAIClient:
|
|||||||
- 'none': Model must not call tools
|
- 'none': Model must not call tools
|
||||||
- 'required': Model must call at least one tool
|
- 'required': Model must call at least one tool
|
||||||
- Or a dict specifying a specific tool to call
|
- Or a dict specifying a specific tool to call
|
||||||
**kwargs: Additional provider-specific parameters.
|
enable_thinking: Per-request thinking toggle. None means use the
|
||||||
|
provider default. Ignored by providers without a per-request
|
||||||
|
toggle (see `supports_toggleable_thinking`).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with:
|
Dictionary with:
|
||||||
|
|||||||
@ -6,7 +6,7 @@ no chat feature is active) are never initialized.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
from frigate.config import FrigateConfig
|
from frigate.config import FrigateConfig
|
||||||
from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum
|
from frigate.config.camera.genai import GenAIConfig, GenAIRoleEnum
|
||||||
@ -108,11 +108,16 @@ class GenAIClientManager:
|
|||||||
name = self._role_map.get(GenAIRoleEnum.embeddings)
|
name = self._role_map.get(GenAIRoleEnum.embeddings)
|
||||||
return self._get_client(name) if name else None
|
return self._get_client(name) if name else None
|
||||||
|
|
||||||
def list_models(self) -> dict[str, list[str]]:
|
def list_models(self) -> dict[str, dict[str, Any]]:
|
||||||
"""Return available models keyed by config entry name."""
|
"""Return per-entry model lists and capabilities, keyed by config entry name."""
|
||||||
result: dict[str, list[str]] = {}
|
result: dict[str, dict[str, Any]] = {}
|
||||||
for name in self._configs:
|
for name, genai_cfg in self._configs.items():
|
||||||
client = self._get_client(name)
|
client = self._get_client(name)
|
||||||
if client:
|
if not client:
|
||||||
result[name] = client.list_models()
|
continue
|
||||||
|
result[name] = {
|
||||||
|
"models": client.list_models(),
|
||||||
|
"roles": [r.value for r in genai_cfg.roles],
|
||||||
|
"supports_toggleable_thinking": client.supports_toggleable_thinking,
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
|
|||||||
@ -62,6 +62,7 @@ class GeminiClient(GenAIClient):
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
images: list[bytes],
|
images: list[bytes],
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
|
enable_thinking: bool = False,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Submit a request to Gemini."""
|
"""Submit a request to Gemini."""
|
||||||
contents = [prompt] + [
|
contents = [prompt] + [
|
||||||
@ -119,11 +120,14 @@ class GeminiClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send chat messages to Gemini with optional tool definitions.
|
Send chat messages to Gemini with optional tool definitions.
|
||||||
|
|
||||||
Implements function calling/tool usage for Gemini models.
|
Implements function calling/tool usage for Gemini models. Thinking is
|
||||||
|
configured at the model level for Gemini, so ``enable_thinking`` is
|
||||||
|
accepted for interface parity and ignored.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Convert messages to Gemini format
|
# Convert messages to Gemini format
|
||||||
|
|||||||
@ -122,6 +122,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
_supports_vision: bool
|
_supports_vision: bool
|
||||||
_supports_audio: bool
|
_supports_audio: bool
|
||||||
_supports_tools: bool
|
_supports_tools: bool
|
||||||
|
_supports_reasoning: bool
|
||||||
_image_token_cache: dict[tuple[int, int], int]
|
_image_token_cache: dict[tuple[int, int], int]
|
||||||
_text_baseline_tokens: int | None
|
_text_baseline_tokens: int | None
|
||||||
_media_marker: str
|
_media_marker: str
|
||||||
@ -135,6 +136,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
self._supports_vision = False
|
self._supports_vision = False
|
||||||
self._supports_audio = False
|
self._supports_audio = False
|
||||||
self._supports_tools = False
|
self._supports_tools = False
|
||||||
|
self._supports_reasoning = False
|
||||||
self._image_token_cache = {}
|
self._image_token_cache = {}
|
||||||
self._text_baseline_tokens = None
|
self._text_baseline_tokens = None
|
||||||
self._media_marker = "<__media__>"
|
self._media_marker = "<__media__>"
|
||||||
@ -164,15 +166,17 @@ class LlamaCppClient(GenAIClient):
|
|||||||
self._supports_vision = info["supports_vision"]
|
self._supports_vision = info["supports_vision"]
|
||||||
self._supports_audio = info["supports_audio"]
|
self._supports_audio = info["supports_audio"]
|
||||||
self._supports_tools = info["supports_tools"]
|
self._supports_tools = info["supports_tools"]
|
||||||
|
self._supports_reasoning = info["supports_reasoning"]
|
||||||
self._media_marker = info["media_marker"]
|
self._media_marker = info["media_marker"]
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s",
|
"llama.cpp model '%s' initialized — context: %s, vision: %s, audio: %s, tools: %s, reasoning: %s",
|
||||||
configured_model,
|
configured_model,
|
||||||
self._context_size or "unknown",
|
self._context_size or "unknown",
|
||||||
self._supports_vision,
|
self._supports_vision,
|
||||||
self._supports_audio,
|
self._supports_audio,
|
||||||
self._supports_tools,
|
self._supports_tools,
|
||||||
|
self._supports_reasoning,
|
||||||
)
|
)
|
||||||
|
|
||||||
return base_url
|
return base_url
|
||||||
@ -200,6 +204,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
"supports_vision": False,
|
"supports_vision": False,
|
||||||
"supports_audio": False,
|
"supports_audio": False,
|
||||||
"supports_tools": False,
|
"supports_tools": False,
|
||||||
|
"supports_reasoning": False,
|
||||||
"media_marker": "<__media__>",
|
"media_marker": "<__media__>",
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -279,10 +284,17 @@ class LlamaCppClient(GenAIClient):
|
|||||||
info["supports_vision"] = bool(modalities.get("vision", False))
|
info["supports_vision"] = bool(modalities.get("vision", False))
|
||||||
info["supports_audio"] = bool(modalities.get("audio", False))
|
info["supports_audio"] = bool(modalities.get("audio", False))
|
||||||
|
|
||||||
|
chat_caps = props.get("chat_template_caps") or {}
|
||||||
|
|
||||||
if not info["supports_tools"]:
|
if not info["supports_tools"]:
|
||||||
chat_caps = props.get("chat_template_caps", {})
|
|
||||||
info["supports_tools"] = bool(chat_caps.get("supports_tools", False))
|
info["supports_tools"] = bool(chat_caps.get("supports_tools", False))
|
||||||
|
|
||||||
|
# llama.cpp does not advertise per-template reasoning support, so
|
||||||
|
# detect it by looking for the `enable_thinking` toggle variable
|
||||||
|
# in the Jinja chat template itself.
|
||||||
|
chat_template = props.get("chat_template") or ""
|
||||||
|
info["supports_reasoning"] = "enable_thinking" in chat_template
|
||||||
|
|
||||||
media_marker = props.get("media_marker")
|
media_marker = props.get("media_marker")
|
||||||
if isinstance(media_marker, str) and media_marker:
|
if isinstance(media_marker, str) and media_marker:
|
||||||
info["media_marker"] = media_marker
|
info["media_marker"] = media_marker
|
||||||
@ -300,6 +312,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
images: list[bytes],
|
images: list[bytes],
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
|
enable_thinking: bool = False,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Submit a request to llama.cpp server."""
|
"""Submit a request to llama.cpp server."""
|
||||||
if self.provider is None:
|
if self.provider is None:
|
||||||
@ -327,7 +340,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Build request payload with llama.cpp native options
|
# Build request payload with llama.cpp native options
|
||||||
payload = {
|
payload: dict[str, Any] = {
|
||||||
"model": self.genai_config.model,
|
"model": self.genai_config.model,
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
@ -341,6 +354,9 @@ class LlamaCppClient(GenAIClient):
|
|||||||
if response_format:
|
if response_format:
|
||||||
payload["response_format"] = response_format
|
payload["response_format"] = response_format
|
||||||
|
|
||||||
|
if self.supports_toggleable_thinking:
|
||||||
|
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.provider}/v1/chat/completions",
|
f"{self.provider}/v1/chat/completions",
|
||||||
json=payload,
|
json=payload,
|
||||||
@ -377,6 +393,10 @@ class LlamaCppClient(GenAIClient):
|
|||||||
"""Whether the loaded model supports tool/function calling."""
|
"""Whether the loaded model supports tool/function calling."""
|
||||||
return self._supports_tools
|
return self._supports_tools
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_toggleable_thinking(self) -> bool:
|
||||||
|
return self._supports_reasoning
|
||||||
|
|
||||||
def list_models(self) -> list[str]:
|
def list_models(self) -> list[str]:
|
||||||
"""Return available model IDs from the llama.cpp server."""
|
"""Return available model IDs from the llama.cpp server."""
|
||||||
base_url = self.provider or (
|
base_url = self.provider or (
|
||||||
@ -504,6 +524,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
tools: Optional[list[dict[str, Any]]],
|
tools: Optional[list[dict[str, Any]]],
|
||||||
tool_choice: Optional[str],
|
tool_choice: Optional[str],
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Build request payload for chat completions (sync or stream)."""
|
"""Build request payload for chat completions (sync or stream)."""
|
||||||
openai_tool_choice = None
|
openai_tool_choice = None
|
||||||
@ -519,14 +540,21 @@ class LlamaCppClient(GenAIClient):
|
|||||||
"messages": messages,
|
"messages": messages,
|
||||||
"model": self.genai_config.model,
|
"model": self.genai_config.model,
|
||||||
}
|
}
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
payload["stream"] = True
|
payload["stream"] = True
|
||||||
payload["stream_options"] = {"include_usage": True}
|
payload["stream_options"] = {"include_usage": True}
|
||||||
payload["timings_per_token"] = True
|
payload["timings_per_token"] = True
|
||||||
|
|
||||||
if tools:
|
if tools:
|
||||||
payload["tools"] = tools
|
payload["tools"] = tools
|
||||||
|
|
||||||
if openai_tool_choice is not None:
|
if openai_tool_choice is not None:
|
||||||
payload["tool_choice"] = openai_tool_choice
|
payload["tool_choice"] = openai_tool_choice
|
||||||
|
|
||||||
|
if enable_thinking is not None and self._supports_reasoning:
|
||||||
|
payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
|
||||||
|
|
||||||
provider_opts = {
|
provider_opts = {
|
||||||
k: v for k, v in self.provider_options.items() if k != "context_size"
|
k: v for k, v in self.provider_options.items() if k != "context_size"
|
||||||
}
|
}
|
||||||
@ -732,6 +760,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send chat messages to llama.cpp server with optional tool definitions.
|
Send chat messages to llama.cpp server with optional tool definitions.
|
||||||
@ -749,7 +778,13 @@ class LlamaCppClient(GenAIClient):
|
|||||||
"finish_reason": "error",
|
"finish_reason": "error",
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
payload = self._build_payload(messages, tools, tool_choice, stream=False)
|
payload = self._build_payload(
|
||||||
|
messages,
|
||||||
|
tools,
|
||||||
|
tool_choice,
|
||||||
|
stream=False,
|
||||||
|
enable_thinking=enable_thinking,
|
||||||
|
)
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.provider}/v1/chat/completions",
|
f"{self.provider}/v1/chat/completions",
|
||||||
json=payload,
|
json=payload,
|
||||||
@ -797,6 +832,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||||
"""Stream chat with tools via OpenAI-compatible streaming API."""
|
"""Stream chat with tools via OpenAI-compatible streaming API."""
|
||||||
if self.provider is None:
|
if self.provider is None:
|
||||||
@ -813,7 +849,13 @@ class LlamaCppClient(GenAIClient):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
payload = self._build_payload(messages, tools, tool_choice, stream=True)
|
payload = self._build_payload(
|
||||||
|
messages,
|
||||||
|
tools,
|
||||||
|
tool_choice,
|
||||||
|
stream=True,
|
||||||
|
enable_thinking=enable_thinking,
|
||||||
|
)
|
||||||
content_parts: list[str] = []
|
content_parts: list[str] = []
|
||||||
reasoning_parts: list[str] = []
|
reasoning_parts: list[str] = []
|
||||||
tool_calls_by_index: dict[int, dict[str, Any]] = {}
|
tool_calls_by_index: dict[int, dict[str, Any]] = {}
|
||||||
|
|||||||
@ -98,6 +98,22 @@ class OllamaClient(GenAIClient):
|
|||||||
|
|
||||||
provider: ApiClient | None
|
provider: ApiClient | None
|
||||||
provider_options: dict[str, Any]
|
provider_options: dict[str, Any]
|
||||||
|
_supports_thinking_cache: Optional[bool] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def supports_toggleable_thinking(self) -> bool:
|
||||||
|
if self._supports_thinking_cache is not None:
|
||||||
|
return self._supports_thinking_cache
|
||||||
|
if self.provider is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
response = self.provider.show(self.genai_config.model)
|
||||||
|
capabilities = response.get("capabilities") or []
|
||||||
|
self._supports_thinking_cache = "thinking" in capabilities
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Failed to query Ollama model capabilities: %s", e)
|
||||||
|
self._supports_thinking_cache = False
|
||||||
|
return self._supports_thinking_cache
|
||||||
|
|
||||||
def _auth_headers(self) -> dict | None:
|
def _auth_headers(self) -> dict | None:
|
||||||
if self.genai_config.api_key:
|
if self.genai_config.api_key:
|
||||||
@ -178,6 +194,7 @@ class OllamaClient(GenAIClient):
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
images: list[bytes],
|
images: list[bytes],
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
|
enable_thinking: bool = False,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Submit a request to Ollama"""
|
"""Submit a request to Ollama"""
|
||||||
if self.provider is None:
|
if self.provider is None:
|
||||||
@ -194,6 +211,8 @@ class OllamaClient(GenAIClient):
|
|||||||
schema = response_format.get("json_schema", {}).get("schema")
|
schema = response_format.get("json_schema", {}).get("schema")
|
||||||
if schema:
|
if schema:
|
||||||
ollama_options["format"] = self._clean_schema_for_ollama(schema)
|
ollama_options["format"] = self._clean_schema_for_ollama(schema)
|
||||||
|
if self.supports_toggleable_thinking:
|
||||||
|
ollama_options["think"] = enable_thinking
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
|
"Ollama generate request: model=%s, prompt_len=%s, image_count=%s, "
|
||||||
"has_format=%s, options=%s",
|
"has_format=%s, options=%s",
|
||||||
@ -274,6 +293,7 @@ class OllamaClient(GenAIClient):
|
|||||||
tools: Optional[list[dict[str, Any]]],
|
tools: Optional[list[dict[str, Any]]],
|
||||||
tool_choice: Optional[str],
|
tool_choice: Optional[str],
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Build request_messages and params for chat (sync or stream)."""
|
"""Build request_messages and params for chat (sync or stream)."""
|
||||||
request_messages = []
|
request_messages = []
|
||||||
@ -318,6 +338,8 @@ class OllamaClient(GenAIClient):
|
|||||||
request_params["stream"] = True
|
request_params["stream"] = True
|
||||||
if tools:
|
if tools:
|
||||||
request_params["tools"] = tools
|
request_params["tools"] = tools
|
||||||
|
if enable_thinking is not None and self.supports_toggleable_thinking:
|
||||||
|
request_params["think"] = enable_thinking
|
||||||
return request_params
|
return request_params
|
||||||
|
|
||||||
def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
|
def _message_from_response(self, response: dict[str, Any]) -> dict[str, Any]:
|
||||||
@ -365,6 +387,7 @@ class OllamaClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
if self.provider is None:
|
if self.provider is None:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@ -377,7 +400,11 @@ class OllamaClient(GenAIClient):
|
|||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
request_params = self._build_request_params(
|
request_params = self._build_request_params(
|
||||||
messages, tools, tool_choice, stream=False
|
messages,
|
||||||
|
tools,
|
||||||
|
tool_choice,
|
||||||
|
stream=False,
|
||||||
|
enable_thinking=enable_thinking,
|
||||||
)
|
)
|
||||||
response = self.provider.chat(**request_params)
|
response = self.provider.chat(**request_params)
|
||||||
return self._message_from_response(response)
|
return self._message_from_response(response)
|
||||||
@ -401,6 +428,7 @@ class OllamaClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||||
"""Stream chat with tools; yields content deltas then final message.
|
"""Stream chat with tools; yields content deltas then final message.
|
||||||
|
|
||||||
@ -430,7 +458,11 @@ class OllamaClient(GenAIClient):
|
|||||||
"Ollama: tools provided, using non-streaming call for tool support"
|
"Ollama: tools provided, using non-streaming call for tool support"
|
||||||
)
|
)
|
||||||
request_params = self._build_request_params(
|
request_params = self._build_request_params(
|
||||||
messages, tools, tool_choice, stream=False
|
messages,
|
||||||
|
tools,
|
||||||
|
tool_choice,
|
||||||
|
stream=False,
|
||||||
|
enable_thinking=enable_thinking,
|
||||||
)
|
)
|
||||||
async_client = OllamaAsyncClient(
|
async_client = OllamaAsyncClient(
|
||||||
host=self.genai_config.base_url,
|
host=self.genai_config.base_url,
|
||||||
@ -452,7 +484,11 @@ class OllamaClient(GenAIClient):
|
|||||||
return
|
return
|
||||||
|
|
||||||
request_params = self._build_request_params(
|
request_params = self._build_request_params(
|
||||||
messages, tools, tool_choice, stream=True
|
messages,
|
||||||
|
tools,
|
||||||
|
tool_choice,
|
||||||
|
stream=True,
|
||||||
|
enable_thinking=enable_thinking,
|
||||||
)
|
)
|
||||||
async_client = OllamaAsyncClient(
|
async_client = OllamaAsyncClient(
|
||||||
host=self.genai_config.base_url,
|
host=self.genai_config.base_url,
|
||||||
|
|||||||
@ -61,6 +61,7 @@ class OpenAIClient(GenAIClient):
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
images: list[bytes],
|
images: list[bytes],
|
||||||
response_format: Optional[dict] = None,
|
response_format: Optional[dict] = None,
|
||||||
|
enable_thinking: bool = False,
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
"""Submit a request to OpenAI."""
|
"""Submit a request to OpenAI."""
|
||||||
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
|
encoded_images = [base64.b64encode(image).decode("utf-8") for image in images]
|
||||||
@ -187,11 +188,14 @@ class OpenAIClient(GenAIClient):
|
|||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: Optional[list[dict[str, Any]]] = None,
|
tools: Optional[list[dict[str, Any]]] = None,
|
||||||
tool_choice: Optional[str] = "auto",
|
tool_choice: Optional[str] = "auto",
|
||||||
|
enable_thinking: Optional[bool] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send chat messages to OpenAI with optional tool definitions.
|
Send chat messages to OpenAI with optional tool definitions.
|
||||||
|
|
||||||
Implements function calling/tool usage for OpenAI models.
|
Implements function calling/tool usage for OpenAI models. The OpenAI
|
||||||
|
chat completions API does not expose a per-request thinking toggle,
|
||||||
|
so ``enable_thinking`` is accepted for interface parity and ignored.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
openai_tool_choice = None
|
openai_tool_choice = None
|
||||||
|
|||||||
@ -65,5 +65,8 @@
|
|||||||
"active": "Reasoning…",
|
"active": "Reasoning…",
|
||||||
"show": "Show reasoning",
|
"show": "Show reasoning",
|
||||||
"hide": "Hide reasoning"
|
"hide": "Hide reasoning"
|
||||||
|
},
|
||||||
|
"thinking": {
|
||||||
|
"toggle": "Toggle thinking"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
147
web/src/components/chat/ChatComposer.tsx
Normal file
147
web/src/components/chat/ChatComposer.tsx
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { Input } from "@/components/ui/input";
|
||||||
|
import { FaArrowUpLong, FaStop } from "react-icons/fa6";
|
||||||
|
import { LuBrain } from "react-icons/lu";
|
||||||
|
import { useTranslation } from "react-i18next";
|
||||||
|
import { cn } from "@/lib/utils";
|
||||||
|
import {
|
||||||
|
Tooltip,
|
||||||
|
TooltipContent,
|
||||||
|
TooltipProvider,
|
||||||
|
TooltipTrigger,
|
||||||
|
} from "@/components/ui/tooltip";
|
||||||
|
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
|
||||||
|
import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
|
||||||
|
import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
|
||||||
|
|
||||||
|
type ChatComposerProps = {
|
||||||
|
input: string;
|
||||||
|
setInput: (value: string) => void;
|
||||||
|
sendMessage: (textOverride?: string) => void;
|
||||||
|
placeholder: string;
|
||||||
|
|
||||||
|
supportsThinking: boolean;
|
||||||
|
thinkingEnabled: boolean;
|
||||||
|
setThinkingEnabled: (value: boolean | undefined) => void;
|
||||||
|
|
||||||
|
isLoading?: boolean;
|
||||||
|
onStop?: () => void;
|
||||||
|
|
||||||
|
attachedEventId?: string | null;
|
||||||
|
onClearAttachment?: () => void;
|
||||||
|
onAttach?: (eventId: string) => void;
|
||||||
|
recentEventIds?: string[];
|
||||||
|
|
||||||
|
large?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function ChatComposer({
|
||||||
|
input,
|
||||||
|
setInput,
|
||||||
|
sendMessage,
|
||||||
|
placeholder,
|
||||||
|
supportsThinking,
|
||||||
|
thinkingEnabled,
|
||||||
|
setThinkingEnabled,
|
||||||
|
isLoading = false,
|
||||||
|
onStop,
|
||||||
|
attachedEventId,
|
||||||
|
onClearAttachment,
|
||||||
|
onAttach,
|
||||||
|
recentEventIds,
|
||||||
|
large = false,
|
||||||
|
}: ChatComposerProps) {
|
||||||
|
const { t } = useTranslation(["views/chat"]);
|
||||||
|
|
||||||
|
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
|
||||||
|
if (e.key === "Enter" && !e.shiftKey) {
|
||||||
|
e.preventDefault();
|
||||||
|
sendMessage();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const showPaperclip = !!onAttach;
|
||||||
|
const showStop = isLoading && !!onStop;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
|
||||||
|
{attachedEventId && onClearAttachment && (
|
||||||
|
<div className="flex items-center">
|
||||||
|
<ChatAttachmentChip
|
||||||
|
eventId={attachedEventId}
|
||||||
|
mode="composer"
|
||||||
|
onRemove={onClearAttachment}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{attachedEventId && (
|
||||||
|
<ChatQuickReplies
|
||||||
|
onSend={(text) => sendMessage(text)}
|
||||||
|
disabled={isLoading}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
<div className="flex w-full flex-row items-center gap-2">
|
||||||
|
{showPaperclip && (
|
||||||
|
<ChatPaperclipButton
|
||||||
|
recentEventIds={recentEventIds ?? []}
|
||||||
|
onAttach={onAttach!}
|
||||||
|
disabled={isLoading || attachedEventId != null}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
{supportsThinking && (
|
||||||
|
<TooltipProvider>
|
||||||
|
<Tooltip>
|
||||||
|
<TooltipTrigger asChild>
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
size="sm"
|
||||||
|
variant={thinkingEnabled ? "select" : "ghost"}
|
||||||
|
aria-pressed={thinkingEnabled}
|
||||||
|
aria-label={t("thinking.toggle")}
|
||||||
|
className={cn(
|
||||||
|
"flex size-9 shrink-0 items-center justify-center rounded-full p-0",
|
||||||
|
!thinkingEnabled && "text-secondary-foreground",
|
||||||
|
)}
|
||||||
|
onClick={() => setThinkingEnabled(!thinkingEnabled)}
|
||||||
|
disabled={isLoading}
|
||||||
|
>
|
||||||
|
<LuBrain className="size-4" />
|
||||||
|
</Button>
|
||||||
|
</TooltipTrigger>
|
||||||
|
<TooltipContent>{t("thinking.toggle")}</TooltipContent>
|
||||||
|
</Tooltip>
|
||||||
|
</TooltipProvider>
|
||||||
|
)}
|
||||||
|
<Input
|
||||||
|
className={cn(
|
||||||
|
"w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent",
|
||||||
|
large && "h-12 text-base",
|
||||||
|
)}
|
||||||
|
placeholder={placeholder}
|
||||||
|
value={input}
|
||||||
|
onChange={(e) => setInput(e.target.value)}
|
||||||
|
onKeyDown={handleKeyDown}
|
||||||
|
aria-busy={isLoading}
|
||||||
|
/>
|
||||||
|
{showStop ? (
|
||||||
|
<Button
|
||||||
|
variant="destructive"
|
||||||
|
className="size-10 shrink-0 rounded-full"
|
||||||
|
onClick={onStop}
|
||||||
|
>
|
||||||
|
<FaStop className="size-3" />
|
||||||
|
</Button>
|
||||||
|
) : (
|
||||||
|
<Button
|
||||||
|
variant="select"
|
||||||
|
className="size-10 shrink-0 rounded-full"
|
||||||
|
disabled={!input.trim() || isLoading}
|
||||||
|
onClick={() => sendMessage()}
|
||||||
|
>
|
||||||
|
<FaArrowUpLong className="size-4" />
|
||||||
|
</Button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -1,15 +1,22 @@
|
|||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Input } from "@/components/ui/input";
|
|
||||||
import { FaArrowUpLong } from "react-icons/fa6";
|
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import type { StartingRequest } from "@/types/chat";
|
import type { StartingRequest } from "@/types/chat";
|
||||||
|
import { ChatComposer } from "@/components/chat/ChatComposer";
|
||||||
|
|
||||||
type ChatStartingStateProps = {
|
type ChatStartingStateProps = {
|
||||||
onSendMessage: (message: string) => void;
|
onSendMessage: (message: string) => void;
|
||||||
|
supportsThinking: boolean;
|
||||||
|
thinkingEnabled: boolean;
|
||||||
|
setThinkingEnabled: (value: boolean | undefined) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
|
export function ChatStartingState({
|
||||||
|
onSendMessage,
|
||||||
|
supportsThinking,
|
||||||
|
thinkingEnabled,
|
||||||
|
setThinkingEnabled,
|
||||||
|
}: ChatStartingStateProps) {
|
||||||
const { t } = useTranslation(["views/chat"]);
|
const { t } = useTranslation(["views/chat"]);
|
||||||
const [input, setInput] = useState("");
|
const [input, setInput] = useState("");
|
||||||
|
|
||||||
@ -36,20 +43,13 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
|
|||||||
onSendMessage(prompt);
|
onSendMessage(prompt);
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleSubmit = () => {
|
const handleSend = (textOverride?: string) => {
|
||||||
const text = input.trim();
|
const text = (textOverride ?? input).trim();
|
||||||
if (!text) return;
|
if (!text) return;
|
||||||
onSendMessage(text);
|
onSendMessage(text);
|
||||||
setInput("");
|
setInput("");
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
|
|
||||||
if (e.key === "Enter" && !e.shiftKey) {
|
|
||||||
e.preventDefault();
|
|
||||||
handleSubmit();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex size-full flex-col items-center justify-center gap-6 p-8">
|
<div className="flex size-full flex-col items-center justify-center gap-6 p-8">
|
||||||
<div className="flex flex-col items-center gap-2">
|
<div className="flex flex-col items-center gap-2">
|
||||||
@ -77,22 +77,17 @@ export function ChatStartingState({ onSendMessage }: ChatStartingStateProps) {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex w-full max-w-2xl flex-row items-center gap-2 rounded-xl bg-secondary p-3">
|
<div className="w-full max-w-2xl">
|
||||||
<Input
|
<ChatComposer
|
||||||
className="h-12 w-full flex-1 border-transparent bg-transparent text-base shadow-none focus-visible:ring-0 dark:bg-transparent"
|
input={input}
|
||||||
|
setInput={setInput}
|
||||||
|
sendMessage={handleSend}
|
||||||
placeholder={t("placeholder")}
|
placeholder={t("placeholder")}
|
||||||
value={input}
|
supportsThinking={supportsThinking}
|
||||||
onChange={(e) => setInput(e.target.value)}
|
thinkingEnabled={thinkingEnabled}
|
||||||
onKeyDown={handleKeyDown}
|
setThinkingEnabled={setThinkingEnabled}
|
||||||
|
large
|
||||||
/>
|
/>
|
||||||
<Button
|
|
||||||
variant="select"
|
|
||||||
className="size-10 shrink-0 rounded-full"
|
|
||||||
disabled={!input.trim()}
|
|
||||||
onClick={handleSubmit}
|
|
||||||
>
|
|
||||||
<FaArrowUpLong size="18" />
|
|
||||||
</Button>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|||||||
@ -8,6 +8,12 @@ import {
|
|||||||
} from "@/components/ui/collapsible";
|
} from "@/components/ui/collapsible";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
import {
|
||||||
|
Tooltip,
|
||||||
|
TooltipContent,
|
||||||
|
TooltipProvider,
|
||||||
|
TooltipTrigger,
|
||||||
|
} from "@/components/ui/tooltip";
|
||||||
|
|
||||||
type ReasoningBubbleProps = {
|
type ReasoningBubbleProps = {
|
||||||
/** The accumulated reasoning text from the model. */
|
/** The accumulated reasoning text from the model. */
|
||||||
@ -54,34 +60,42 @@ export function ReasoningBubble({
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
|
<div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
|
||||||
<Collapsible open={open} onOpenChange={handleOpenChange}>
|
<TooltipProvider>
|
||||||
<CollapsibleTrigger asChild>
|
<Collapsible open={open} onOpenChange={handleOpenChange}>
|
||||||
<Button
|
<CollapsibleTrigger asChild>
|
||||||
variant="ghost"
|
<Button
|
||||||
size="sm"
|
variant="ghost"
|
||||||
className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
|
size="sm"
|
||||||
>
|
className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
|
||||||
<LuBrain
|
>
|
||||||
className={cn(
|
<Tooltip>
|
||||||
"size-3 shrink-0",
|
<TooltipTrigger asChild>
|
||||||
!answerStarted && "animate-pulse",
|
<div className="flex items-center gap-2">
|
||||||
)}
|
<LuBrain
|
||||||
/>
|
className={cn(
|
||||||
<span className="break-words font-medium">{label}</span>
|
"size-3 shrink-0",
|
||||||
{answerStarted &&
|
!answerStarted && "animate-pulse",
|
||||||
(open ? (
|
)}
|
||||||
<LuChevronDown className="ml-auto size-3 shrink-0" />
|
/>
|
||||||
) : (
|
</div>
|
||||||
<LuChevronRight className="ml-auto size-3 shrink-0" />
|
</TooltipTrigger>
|
||||||
))}
|
<TooltipContent>{label}</TooltipContent>
|
||||||
</Button>
|
</Tooltip>
|
||||||
</CollapsibleTrigger>
|
{answerStarted &&
|
||||||
<CollapsibleContent>
|
(open ? (
|
||||||
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
|
<LuChevronDown className="ml-auto size-3 shrink-0" />
|
||||||
{reasoning}
|
) : (
|
||||||
</pre>
|
<LuChevronRight className="ml-auto size-3 shrink-0" />
|
||||||
</CollapsibleContent>
|
))}
|
||||||
</Collapsible>
|
</Button>
|
||||||
|
</CollapsibleTrigger>
|
||||||
|
<CollapsibleContent>
|
||||||
|
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
|
||||||
|
{reasoning}
|
||||||
|
</pre>
|
||||||
|
</CollapsibleContent>
|
||||||
|
</Collapsible>
|
||||||
|
</TooltipProvider>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,6 +23,7 @@ import {
|
|||||||
PopoverTrigger,
|
PopoverTrigger,
|
||||||
} from "@/components/ui/popover";
|
} from "@/components/ui/popover";
|
||||||
import type { ConfigFormContext, JsonObject } from "@/types/configForm";
|
import type { ConfigFormContext, JsonObject } from "@/types/configForm";
|
||||||
|
import type { GenAIModelsResponse } from "@/types/chat";
|
||||||
import { getSizedFieldClassName } from "../utils";
|
import { getSizedFieldClassName } from "../utils";
|
||||||
|
|
||||||
type ProbeResponse =
|
type ProbeResponse =
|
||||||
@ -73,11 +74,12 @@ export function GenAIModelWidget(props: WidgetProps) {
|
|||||||
return `${e.provider ?? ""}|${e.base_url ?? ""}`;
|
return `${e.provider ?? ""}|${e.base_url ?? ""}`;
|
||||||
}, [providerKey, formContext?.fullConfig]);
|
}, [providerKey, formContext?.fullConfig]);
|
||||||
|
|
||||||
const { data: allModels, mutate: mutateModels } = useSWR<
|
const { data: allModels, mutate: mutateModels } = useSWR<GenAIModelsResponse>(
|
||||||
Record<string, string[]>
|
"genai/models",
|
||||||
>("genai/models", {
|
{
|
||||||
revalidateOnFocus: false,
|
revalidateOnFocus: false,
|
||||||
});
|
},
|
||||||
|
);
|
||||||
|
|
||||||
// Revalidate models when the saved config fingerprint changes (e.g. after
|
// Revalidate models when the saved config fingerprint changes (e.g. after
|
||||||
// switching provider or base_url and saving).
|
// switching provider or base_url and saving).
|
||||||
@ -89,9 +91,9 @@ export function GenAIModelWidget(props: WidgetProps) {
|
|||||||
}
|
}
|
||||||
}, [configFingerprint, mutateModels]);
|
}, [configFingerprint, mutateModels]);
|
||||||
|
|
||||||
const fetchedModels = useMemo(() => {
|
const fetchedModels = useMemo<string[]>(() => {
|
||||||
if (!allModels || !providerKey) return [];
|
if (!allModels || !providerKey) return [];
|
||||||
return allModels[providerKey] ?? [];
|
return allModels[providerKey]?.models ?? [];
|
||||||
}, [allModels, providerKey]);
|
}, [allModels, providerKey]);
|
||||||
|
|
||||||
const [probeStatus, setProbeStatus] = useState<ProbeStatus>("idle");
|
const [probeStatus, setProbeStatus] = useState<ProbeStatus>("idle");
|
||||||
|
|||||||
@ -1,20 +1,21 @@
|
|||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { Input } from "@/components/ui/input";
|
|
||||||
import { FaArrowUpLong, FaStop } from "react-icons/fa6";
|
|
||||||
import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu";
|
import { LuCircleAlert, LuMessageSquarePlus } from "react-icons/lu";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
|
import useSWR from "swr";
|
||||||
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
|
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
|
||||||
import { MessageBubble } from "@/components/chat/ChatMessage";
|
import { MessageBubble } from "@/components/chat/ChatMessage";
|
||||||
import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
|
import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
|
||||||
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
|
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
|
||||||
import { ChatStartingState } from "@/components/chat/ChatStartingState";
|
import { ChatStartingState } from "@/components/chat/ChatStartingState";
|
||||||
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
|
import { ChatComposer } from "@/components/chat/ChatComposer";
|
||||||
import { ChatQuickReplies } from "@/components/chat/ChatQuickReplies";
|
|
||||||
import { ChatPaperclipButton } from "@/components/chat/ChatPaperclipButton";
|
|
||||||
import ChatSettings from "@/components/chat/ChatSettings";
|
import ChatSettings from "@/components/chat/ChatSettings";
|
||||||
import type { ChatMessage, ShowStatsMode } from "@/types/chat";
|
import type {
|
||||||
|
ChatMessage,
|
||||||
|
GenAIModelsResponse,
|
||||||
|
ShowStatsMode,
|
||||||
|
} from "@/types/chat";
|
||||||
import { usePersistence } from "@/hooks/use-persistence";
|
import { usePersistence } from "@/hooks/use-persistence";
|
||||||
import {
|
import {
|
||||||
getEventIdsFromSearchObjectsToolCalls,
|
getEventIdsFromSearchObjectsToolCalls,
|
||||||
@ -38,9 +39,26 @@ export default function ChatPage() {
|
|||||||
"chat-auto-scroll",
|
"chat-auto-scroll",
|
||||||
true,
|
true,
|
||||||
);
|
);
|
||||||
|
const [thinkingEnabled, setThinkingEnabled] = usePersistence<boolean>(
|
||||||
|
"chat-thinking-enabled",
|
||||||
|
false,
|
||||||
|
);
|
||||||
const scrollRef = useRef<HTMLDivElement>(null);
|
const scrollRef = useRef<HTMLDivElement>(null);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
|
|
||||||
|
const { data: genaiInfo } = useSWR<GenAIModelsResponse>("genai/models", {
|
||||||
|
revalidateOnFocus: false,
|
||||||
|
});
|
||||||
|
const supportsThinking = useMemo(() => {
|
||||||
|
if (!genaiInfo) return false;
|
||||||
|
for (const entry of Object.values(genaiInfo)) {
|
||||||
|
if (entry.roles?.includes("chat") && entry.supports_toggleable_thinking) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}, [genaiInfo]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
document.title = t("documentTitle");
|
document.title = t("documentTitle");
|
||||||
}, [t]);
|
}, [t]);
|
||||||
@ -100,9 +118,10 @@ export default function ChatPage() {
|
|||||||
defaultErrorMessage: t("error"),
|
defaultErrorMessage: t("error"),
|
||||||
},
|
},
|
||||||
controller.signal,
|
controller.signal,
|
||||||
|
supportsThinking ? { enableThinking: !!thinkingEnabled } : {},
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
[isLoading, t],
|
[isLoading, supportsThinking, t, thinkingEnabled],
|
||||||
);
|
);
|
||||||
|
|
||||||
const recentEventIds = useMemo(() => {
|
const recentEventIds = useMemo(() => {
|
||||||
@ -305,6 +324,9 @@ export default function ChatPage() {
|
|||||||
setInput("");
|
setInput("");
|
||||||
submitConversation([{ role: "user", content: message }]);
|
submitConversation([{ role: "user", content: message }]);
|
||||||
}}
|
}}
|
||||||
|
supportsThinking={supportsThinking}
|
||||||
|
thinkingEnabled={!!thinkingEnabled}
|
||||||
|
setThinkingEnabled={setThinkingEnabled}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
@ -313,7 +335,7 @@ export default function ChatPage() {
|
|||||||
{hasStarted && (
|
{hasStarted && (
|
||||||
<div className="flex shrink-0 justify-center p-2 md:px-4 md:pb-4">
|
<div className="flex shrink-0 justify-center p-2 md:px-4 md:pb-4">
|
||||||
<div className="flex w-full xl:w-[50%] 3xl:w-[35%]">
|
<div className="flex w-full xl:w-[50%] 3xl:w-[35%]">
|
||||||
<ChatEntry
|
<ChatComposer
|
||||||
input={input}
|
input={input}
|
||||||
setInput={setInput}
|
setInput={setInput}
|
||||||
sendMessage={sendMessage}
|
sendMessage={sendMessage}
|
||||||
@ -324,6 +346,9 @@ export default function ChatPage() {
|
|||||||
onAttach={setAttachedEventId}
|
onAttach={setAttachedEventId}
|
||||||
onStop={stopGeneration}
|
onStop={stopGeneration}
|
||||||
recentEventIds={recentEventIds}
|
recentEventIds={recentEventIds}
|
||||||
|
supportsThinking={supportsThinking}
|
||||||
|
thinkingEnabled={!!thinkingEnabled}
|
||||||
|
setThinkingEnabled={setThinkingEnabled}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -331,89 +356,3 @@ export default function ChatPage() {
|
|||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
type ChatEntryProps = {
|
|
||||||
input: string;
|
|
||||||
setInput: (value: string) => void;
|
|
||||||
sendMessage: (textOverride?: string) => void;
|
|
||||||
isLoading: boolean;
|
|
||||||
placeholder: string;
|
|
||||||
attachedEventId: string | null;
|
|
||||||
onClearAttachment: () => void;
|
|
||||||
onAttach: (eventId: string) => void;
|
|
||||||
onStop: () => void;
|
|
||||||
recentEventIds: string[];
|
|
||||||
};
|
|
||||||
|
|
||||||
function ChatEntry({
|
|
||||||
input,
|
|
||||||
setInput,
|
|
||||||
sendMessage,
|
|
||||||
isLoading,
|
|
||||||
placeholder,
|
|
||||||
attachedEventId,
|
|
||||||
onClearAttachment,
|
|
||||||
onAttach,
|
|
||||||
onStop,
|
|
||||||
recentEventIds,
|
|
||||||
}: ChatEntryProps) {
|
|
||||||
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
|
|
||||||
if (e.key === "Enter" && !e.shiftKey) {
|
|
||||||
e.preventDefault();
|
|
||||||
sendMessage();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex w-full flex-col items-stretch justify-center gap-2 rounded-xl bg-secondary p-3">
|
|
||||||
{attachedEventId && (
|
|
||||||
<div className="flex items-center">
|
|
||||||
<ChatAttachmentChip
|
|
||||||
eventId={attachedEventId}
|
|
||||||
mode="composer"
|
|
||||||
onRemove={onClearAttachment}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{attachedEventId && (
|
|
||||||
<ChatQuickReplies
|
|
||||||
onSend={(text) => sendMessage(text)}
|
|
||||||
disabled={isLoading}
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
<div className="flex w-full flex-row items-center gap-2">
|
|
||||||
<ChatPaperclipButton
|
|
||||||
recentEventIds={recentEventIds}
|
|
||||||
onAttach={onAttach}
|
|
||||||
disabled={isLoading || attachedEventId != null}
|
|
||||||
/>
|
|
||||||
<Input
|
|
||||||
className="w-full flex-1 border-transparent bg-transparent shadow-none focus-visible:ring-0 dark:bg-transparent"
|
|
||||||
placeholder={placeholder}
|
|
||||||
value={input}
|
|
||||||
onChange={(e) => setInput(e.target.value)}
|
|
||||||
onKeyDown={handleKeyDown}
|
|
||||||
aria-busy={isLoading}
|
|
||||||
/>
|
|
||||||
{isLoading ? (
|
|
||||||
<Button
|
|
||||||
variant="destructive"
|
|
||||||
className="size-10 shrink-0 rounded-full"
|
|
||||||
onClick={onStop}
|
|
||||||
>
|
|
||||||
<FaStop className="size-3" />
|
|
||||||
</Button>
|
|
||||||
) : (
|
|
||||||
<Button
|
|
||||||
variant="select"
|
|
||||||
className="size-10 shrink-0 rounded-full"
|
|
||||||
disabled={!input.trim()}
|
|
||||||
onClick={() => sendMessage()}
|
|
||||||
>
|
|
||||||
<FaArrowUpLong className="size-4" />
|
|
||||||
</Button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|||||||
@ -25,3 +25,11 @@ export type ChatStats = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export type ShowStatsMode = "while_generating" | "always";
|
export type ShowStatsMode = "while_generating" | "always";
|
||||||
|
|
||||||
|
export type GenAIProviderInfo = {
|
||||||
|
models: string[];
|
||||||
|
roles: string[];
|
||||||
|
supports_toggleable_thinking: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type GenAIModelsResponse = Record<string, GenAIProviderInfo>;
|
||||||
|
|||||||
@ -34,12 +34,17 @@ type StreamChunk =
|
|||||||
* POST to chat/completion with stream: true, parse NDJSON stream, and invoke
|
* POST to chat/completion with stream: true, parse NDJSON stream, and invoke
|
||||||
* callbacks so the caller can update UI (e.g. React state).
|
* callbacks so the caller can update UI (e.g. React state).
|
||||||
*/
|
*/
|
||||||
|
export type StreamChatOptions = {
|
||||||
|
enableThinking?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
export async function streamChatCompletion(
|
export async function streamChatCompletion(
|
||||||
url: string,
|
url: string,
|
||||||
headers: Record<string, string>,
|
headers: Record<string, string>,
|
||||||
apiMessages: { role: string; content: string }[],
|
apiMessages: { role: string; content: string }[],
|
||||||
callbacks: StreamChatCallbacks,
|
callbacks: StreamChatCallbacks,
|
||||||
signal?: AbortSignal,
|
signal?: AbortSignal,
|
||||||
|
options: StreamChatOptions = {},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const {
|
const {
|
||||||
updateMessages,
|
updateMessages,
|
||||||
@ -50,10 +55,17 @@ export async function streamChatCompletion(
|
|||||||
} = callbacks;
|
} = callbacks;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
const body: Record<string, unknown> = {
|
||||||
|
messages: apiMessages,
|
||||||
|
stream: true,
|
||||||
|
};
|
||||||
|
if (options.enableThinking !== undefined) {
|
||||||
|
body.enable_thinking = options.enableThinking;
|
||||||
|
}
|
||||||
const res = await fetch(url, {
|
const res = await fetch(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers,
|
headers,
|
||||||
body: JSON.stringify({ messages: apiMessages, stream: true }),
|
body: JSON.stringify(body),
|
||||||
signal,
|
signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user