mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-07-02 18:11:13 +03:00
Implement reasoning traces in the UI
This commit is contained in:
parent
31f9611d34
commit
c0d784a45b
@ -1185,6 +1185,13 @@ async def chat_completion(
|
|||||||
)
|
)
|
||||||
+ b"\n"
|
+ b"\n"
|
||||||
)
|
)
|
||||||
|
elif kind == "reasoning_delta":
|
||||||
|
yield (
|
||||||
|
json.dumps({"type": "reasoning", "delta": value}).encode(
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
+ b"\n"
|
||||||
|
)
|
||||||
elif kind == "stats":
|
elif kind == "stats":
|
||||||
yield (
|
yield (
|
||||||
json.dumps({"type": "stats", **value}).encode("utf-8")
|
json.dumps({"type": "stats", **value}).encode("utf-8")
|
||||||
@ -1285,6 +1292,7 @@ async def chat_completion(
|
|||||||
final_content = response.get("content") or ""
|
final_content = response.get("content") or ""
|
||||||
|
|
||||||
if body.stream:
|
if body.stream:
|
||||||
|
final_reasoning = response.get("reasoning")
|
||||||
|
|
||||||
async def stream_body() -> Any:
|
async def stream_body() -> Any:
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
@ -1299,6 +1307,15 @@ async def chat_completion(
|
|||||||
).encode("utf-8")
|
).encode("utf-8")
|
||||||
+ b"\n"
|
+ b"\n"
|
||||||
)
|
)
|
||||||
|
# Emit the full reasoning trace up front when the
|
||||||
|
# underlying client did not stream it
|
||||||
|
if final_reasoning:
|
||||||
|
yield (
|
||||||
|
json.dumps(
|
||||||
|
{"type": "reasoning", "delta": final_reasoning}
|
||||||
|
).encode("utf-8")
|
||||||
|
+ b"\n"
|
||||||
|
)
|
||||||
# Stream content in word-sized chunks for smooth UX
|
# Stream content in word-sized chunks for smooth UX
|
||||||
for part in chunk_content(final_content):
|
for part in chunk_content(final_content):
|
||||||
yield (
|
yield (
|
||||||
@ -1319,6 +1336,7 @@ async def chat_completion(
|
|||||||
message=ChatMessageResponse(
|
message=ChatMessageResponse(
|
||||||
role="assistant",
|
role="assistant",
|
||||||
content=final_content,
|
content=final_content,
|
||||||
|
reasoning=response.get("reasoning"),
|
||||||
tool_calls=None,
|
tool_calls=None,
|
||||||
),
|
),
|
||||||
finish_reason=response.get("finish_reason", "stop"),
|
finish_reason=response.get("finish_reason", "stop"),
|
||||||
|
|||||||
@ -20,6 +20,10 @@ class ChatMessageResponse(BaseModel):
|
|||||||
content: Optional[str] = Field(
|
content: Optional[str] = Field(
|
||||||
default=None, description="Message content (None if tool calls present)"
|
default=None, description="Message content (None if tool calls present)"
|
||||||
)
|
)
|
||||||
|
reasoning: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Separated reasoning/thinking trace if the model emitted one",
|
||||||
|
)
|
||||||
tool_calls: Optional[list[ToolCallInvocation]] = Field(
|
tool_calls: Optional[list[ToolCallInvocation]] = Field(
|
||||||
default=None, description="Tool calls if LLM wants to call tools"
|
default=None, description="Tool calls if LLM wants to call tools"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -300,6 +300,10 @@ class GenAIClient:
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary with:
|
Dictionary with:
|
||||||
- 'content': Optional[str] - The text response from the LLM, None if tool calls
|
- 'content': Optional[str] - The text response from the LLM, None if tool calls
|
||||||
|
- 'reasoning': Optional[str] - The separated reasoning/thinking trace
|
||||||
|
if the model emitted one (e.g. via OpenAI-compatible
|
||||||
|
`reasoning_content`). None when the model does not surface a
|
||||||
|
trace or the provider does not parse it.
|
||||||
- 'tool_calls': Optional[List[Dict]] - List of tool calls if LLM wants to call tools.
|
- 'tool_calls': Optional[List[Dict]] - List of tool calls if LLM wants to call tools.
|
||||||
Each tool call dict has:
|
Each tool call dict has:
|
||||||
- 'id': str - Unique identifier for this tool call
|
- 'id': str - Unique identifier for this tool call
|
||||||
@ -311,6 +315,14 @@ class GenAIClient:
|
|||||||
- 'length': Hit token limit
|
- 'length': Hit token limit
|
||||||
- 'error': An error occurred
|
- 'error': An error occurred
|
||||||
|
|
||||||
|
Streaming counterpart `chat_with_tools_stream` yields
|
||||||
|
``(kind, value)`` tuples where ``kind`` is one of:
|
||||||
|
- 'content_delta': value is a string fragment of the answer
|
||||||
|
- 'reasoning_delta': value is a string fragment of the reasoning
|
||||||
|
trace (emitted before content for thinking models)
|
||||||
|
- 'stats': value is a usage stats dict
|
||||||
|
- 'message': value is the final dict shape described above
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
NotImplementedError: If the provider doesn't implement this method.
|
NotImplementedError: If the provider doesn't implement this method.
|
||||||
"""
|
"""
|
||||||
@ -321,6 +333,7 @@ class GenAIClient:
|
|||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"content": None,
|
"content": None,
|
||||||
|
"reasoning": None,
|
||||||
"tool_calls": None,
|
"tool_calls": None,
|
||||||
"finish_reason": "error",
|
"finish_reason": "error",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -531,16 +531,24 @@ class LlamaCppClient(GenAIClient):
|
|||||||
return payload
|
return payload
|
||||||
|
|
||||||
def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
|
def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
|
"""Parse OpenAI-style choice into {content, reasoning, tool_calls, finish_reason}.
|
||||||
|
|
||||||
|
llama.cpp's `--reasoning-format` puts the trace in
|
||||||
|
`message.reasoning_content` (preferred) or `message.thinking`; both
|
||||||
|
keys are accepted so different builds work without configuration.
|
||||||
|
"""
|
||||||
message = choice.get("message", {})
|
message = choice.get("message", {})
|
||||||
content = message.get("content")
|
content = message.get("content")
|
||||||
content = content.strip() if content else None
|
content = content.strip() if content else None
|
||||||
|
reasoning = message.get("reasoning_content") or message.get("thinking")
|
||||||
|
reasoning = reasoning.strip() if reasoning else None
|
||||||
tool_calls = parse_tool_calls_from_message(message)
|
tool_calls = parse_tool_calls_from_message(message)
|
||||||
finish_reason = choice.get("finish_reason") or (
|
finish_reason = choice.get("finish_reason") or (
|
||||||
"tool_calls" if tool_calls else "stop" if content else "error"
|
"tool_calls" if tool_calls else "stop" if content else "error"
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"content": content,
|
"content": content,
|
||||||
|
"reasoning": reasoning,
|
||||||
"tool_calls": tool_calls,
|
"tool_calls": tool_calls,
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
}
|
}
|
||||||
@ -803,6 +811,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
try:
|
try:
|
||||||
payload = self._build_payload(messages, tools, tool_choice, stream=True)
|
payload = self._build_payload(messages, tools, tool_choice, stream=True)
|
||||||
content_parts: list[str] = []
|
content_parts: list[str] = []
|
||||||
|
reasoning_parts: list[str] = []
|
||||||
tool_calls_by_index: dict[int, dict[str, Any]] = {}
|
tool_calls_by_index: dict[int, dict[str, Any]] = {}
|
||||||
finish_reason = "stop"
|
finish_reason = "stop"
|
||||||
|
|
||||||
@ -832,6 +841,15 @@ class LlamaCppClient(GenAIClient):
|
|||||||
delta = choices[0].get("delta", {})
|
delta = choices[0].get("delta", {})
|
||||||
if choices[0].get("finish_reason"):
|
if choices[0].get("finish_reason"):
|
||||||
finish_reason = choices[0]["finish_reason"]
|
finish_reason = choices[0]["finish_reason"]
|
||||||
|
# llama.cpp emits separated thinking under
|
||||||
|
# reasoning_content (preferred) or thinking before any
|
||||||
|
# content tokens arrive
|
||||||
|
reasoning_delta = delta.get("reasoning_content") or delta.get(
|
||||||
|
"thinking"
|
||||||
|
)
|
||||||
|
if reasoning_delta:
|
||||||
|
reasoning_parts.append(reasoning_delta)
|
||||||
|
yield ("reasoning_delta", reasoning_delta)
|
||||||
if delta.get("content"):
|
if delta.get("content"):
|
||||||
content_parts.append(delta["content"])
|
content_parts.append(delta["content"])
|
||||||
yield ("content_delta", delta["content"])
|
yield ("content_delta", delta["content"])
|
||||||
@ -857,6 +875,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
)
|
)
|
||||||
|
|
||||||
full_content = "".join(content_parts).strip() or None
|
full_content = "".join(content_parts).strip() or None
|
||||||
|
full_reasoning = "".join(reasoning_parts).strip() or None
|
||||||
tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
|
tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
|
||||||
if tool_calls_list:
|
if tool_calls_list:
|
||||||
finish_reason = "tool_calls"
|
finish_reason = "tool_calls"
|
||||||
@ -864,6 +883,7 @@ class LlamaCppClient(GenAIClient):
|
|||||||
"message",
|
"message",
|
||||||
{
|
{
|
||||||
"content": full_content,
|
"content": full_content,
|
||||||
|
"reasoning": full_reasoning,
|
||||||
"tool_calls": tool_calls_list,
|
"tool_calls": tool_calls_list,
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
},
|
},
|
||||||
|
|||||||
@ -60,5 +60,10 @@
|
|||||||
"stats": {
|
"stats": {
|
||||||
"context": "{{tokens}} tokens",
|
"context": "{{tokens}} tokens",
|
||||||
"tokens_per_second": "{{rate}} t/s"
|
"tokens_per_second": "{{rate}} t/s"
|
||||||
|
},
|
||||||
|
"reasoning": {
|
||||||
|
"thinking": "Thinking…",
|
||||||
|
"show": "Show reasoning",
|
||||||
|
"hide": "Hide reasoning"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
87
web/src/components/chat/ReasoningBubble.tsx
Normal file
87
web/src/components/chat/ReasoningBubble.tsx
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import { useState, useEffect, useRef } from "react";
|
||||||
|
import { useTranslation } from "react-i18next";
|
||||||
|
import { LuBrain, LuChevronDown, LuChevronRight } from "react-icons/lu";
|
||||||
|
import {
|
||||||
|
Collapsible,
|
||||||
|
CollapsibleContent,
|
||||||
|
CollapsibleTrigger,
|
||||||
|
} from "@/components/ui/collapsible";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
|
type ReasoningBubbleProps = {
|
||||||
|
/** The accumulated reasoning text from the model. */
|
||||||
|
reasoning: string;
|
||||||
|
/**
|
||||||
|
* Whether the assistant has begun producing the user-facing answer.
|
||||||
|
* While false the reasoning is still streaming and we keep the panel
|
||||||
|
* open with a "Thinking…" label. Once true, the panel auto-collapses
|
||||||
|
* so the answer is the primary focus, but stays expandable.
|
||||||
|
*/
|
||||||
|
answerStarted: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function ReasoningBubble({
|
||||||
|
reasoning,
|
||||||
|
answerStarted,
|
||||||
|
}: ReasoningBubbleProps) {
|
||||||
|
const { t } = useTranslation(["views/chat"]);
|
||||||
|
// Open while the model is still mid-thought (no answer tokens yet);
|
||||||
|
// once the answer begins, collapse on its own but let the user reopen.
|
||||||
|
const [open, setOpen] = useState(true);
|
||||||
|
const userInteractedRef = useRef(false);
|
||||||
|
const lastAutoState = useRef(true);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (userInteractedRef.current) return;
|
||||||
|
const desired = !answerStarted;
|
||||||
|
if (desired !== lastAutoState.current) {
|
||||||
|
lastAutoState.current = desired;
|
||||||
|
setOpen(desired);
|
||||||
|
}
|
||||||
|
}, [answerStarted]);
|
||||||
|
|
||||||
|
const handleOpenChange = (next: boolean) => {
|
||||||
|
userInteractedRef.current = true;
|
||||||
|
setOpen(next);
|
||||||
|
};
|
||||||
|
|
||||||
|
const label = !answerStarted
|
||||||
|
? t("reasoning.thinking")
|
||||||
|
: open
|
||||||
|
? t("reasoning.hide")
|
||||||
|
: t("reasoning.show");
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
|
||||||
|
<Collapsible open={open} onOpenChange={handleOpenChange}>
|
||||||
|
<CollapsibleTrigger asChild>
|
||||||
|
<Button
|
||||||
|
variant="ghost"
|
||||||
|
size="sm"
|
||||||
|
className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
|
||||||
|
>
|
||||||
|
<LuBrain
|
||||||
|
className={cn(
|
||||||
|
"size-3 shrink-0",
|
||||||
|
!answerStarted && "animate-pulse",
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
<span className="break-words font-medium">{label}</span>
|
||||||
|
{answerStarted &&
|
||||||
|
(open ? (
|
||||||
|
<LuChevronDown className="ml-auto size-3 shrink-0" />
|
||||||
|
) : (
|
||||||
|
<LuChevronRight className="ml-auto size-3 shrink-0" />
|
||||||
|
))}
|
||||||
|
</Button>
|
||||||
|
</CollapsibleTrigger>
|
||||||
|
<CollapsibleContent>
|
||||||
|
<pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
|
||||||
|
{reasoning}
|
||||||
|
</pre>
|
||||||
|
</CollapsibleContent>
|
||||||
|
</Collapsible>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -7,6 +7,7 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react";
|
|||||||
import axios from "axios";
|
import axios from "axios";
|
||||||
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
|
import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
|
||||||
import { MessageBubble } from "@/components/chat/ChatMessage";
|
import { MessageBubble } from "@/components/chat/ChatMessage";
|
||||||
|
import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
|
||||||
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
|
import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
|
||||||
import { ChatStartingState } from "@/components/chat/ChatStartingState";
|
import { ChatStartingState } from "@/components/chat/ChatStartingState";
|
||||||
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
|
import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
|
||||||
@ -200,15 +201,18 @@ export default function ChatPage() {
|
|||||||
const hasToolCalls =
|
const hasToolCalls =
|
||||||
msg.toolCalls && msg.toolCalls.length > 0;
|
msg.toolCalls && msg.toolCalls.length > 0;
|
||||||
const hasContent = !!msg.content?.trim();
|
const hasContent = !!msg.content?.trim();
|
||||||
|
const hasReasoning = !!msg.reasoning?.trim();
|
||||||
const showProcessing =
|
const showProcessing =
|
||||||
isLastAssistant && isLoading && !hasContent;
|
isLastAssistant && isLoading && !hasContent && !hasReasoning;
|
||||||
|
|
||||||
// Hide empty placeholder only when there are no tool calls yet
|
// Hide empty placeholder only when there are no tool calls
|
||||||
|
// and no reasoning streaming yet
|
||||||
if (
|
if (
|
||||||
isLastAssistant &&
|
isLastAssistant &&
|
||||||
isLoading &&
|
isLoading &&
|
||||||
!hasContent &&
|
!hasContent &&
|
||||||
!hasToolCalls
|
!hasToolCalls &&
|
||||||
|
!hasReasoning
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
@ -226,6 +230,12 @@ export default function ChatPage() {
|
|||||||
{msg.role === "assistant" && hasToolCalls && (
|
{msg.role === "assistant" && hasToolCalls && (
|
||||||
<ToolCallsGroup toolCalls={msg.toolCalls!} />
|
<ToolCallsGroup toolCalls={msg.toolCalls!} />
|
||||||
)}
|
)}
|
||||||
|
{msg.role === "assistant" && hasReasoning && (
|
||||||
|
<ReasoningBubble
|
||||||
|
reasoning={msg.reasoning!}
|
||||||
|
answerStarted={hasContent}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
{showProcessing ? (
|
{showProcessing ? (
|
||||||
<div className="flex items-center gap-2 self-start rounded-2xl bg-muted px-5 py-4">
|
<div className="flex items-center gap-2 self-start rounded-2xl bg-muted px-5 py-4">
|
||||||
<span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.3s]" />
|
<span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.3s]" />
|
||||||
|
|||||||
@ -7,6 +7,7 @@ export type ToolCall = {
|
|||||||
export type ChatMessage = {
|
export type ChatMessage = {
|
||||||
role: "user" | "assistant";
|
role: "user" | "assistant";
|
||||||
content: string;
|
content: string;
|
||||||
|
reasoning?: string;
|
||||||
toolCalls?: ToolCall[];
|
toolCalls?: ToolCall[];
|
||||||
stats?: ChatStats;
|
stats?: ChatStats;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -27,6 +27,7 @@ type StreamChunk =
|
|||||||
| { type: "error"; error: string }
|
| { type: "error"; error: string }
|
||||||
| { type: "tool_calls"; tool_calls: ToolCall[] }
|
| { type: "tool_calls"; tool_calls: ToolCall[] }
|
||||||
| { type: "content"; delta: string }
|
| { type: "content"; delta: string }
|
||||||
|
| { type: "reasoning"; delta: string }
|
||||||
| StatsChunk;
|
| StatsChunk;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -109,6 +110,19 @@ export async function streamChatCompletion(
|
|||||||
});
|
});
|
||||||
return "continue";
|
return "continue";
|
||||||
}
|
}
|
||||||
|
if (data.type === "reasoning" && data.delta !== undefined) {
|
||||||
|
updateMessages((prev) => {
|
||||||
|
const next = [...prev];
|
||||||
|
const lastMsg = next[next.length - 1];
|
||||||
|
if (lastMsg?.role === "assistant")
|
||||||
|
next[next.length - 1] = {
|
||||||
|
...lastMsg,
|
||||||
|
reasoning: (lastMsg.reasoning ?? "") + data.delta,
|
||||||
|
};
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
return "continue";
|
||||||
|
}
|
||||||
if (data.type === "stats") {
|
if (data.type === "stats") {
|
||||||
const stats: ChatStats = {
|
const stats: ChatStats = {
|
||||||
promptTokens: data.prompt_tokens,
|
promptTokens: data.prompt_tokens,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user