diff --git a/frigate/api/chat.py b/frigate/api/chat.py
index a701a04bd5..291503dbba 100644
--- a/frigate/api/chat.py
+++ b/frigate/api/chat.py
@@ -1185,6 +1185,13 @@ async def chat_completion(
                             )
                             + b"\n"
                         )
+                    elif kind == "reasoning_delta":
+                        yield (
+                            json.dumps({"type": "reasoning", "delta": value}).encode(
+                                "utf-8"
+                            )
+                            + b"\n"
+                        )
                     elif kind == "stats":
                         yield (
                             json.dumps({"type": "stats", **value}).encode("utf-8")
@@ -1285,6 +1292,7 @@ async def chat_completion(
                 final_content = response.get("content") or ""
 
                 if body.stream:
+                    final_reasoning = response.get("reasoning")
 
                     async def stream_body() -> Any:
                         if tool_calls:
@@ -1299,6 +1307,15 @@ async def chat_completion(
                                 ).encode("utf-8")
                                 + b"\n"
                             )
+                        # Emit the full reasoning trace up front when the
+                        # underlying client did not stream it
+                        if final_reasoning:
+                            yield (
+                                json.dumps(
+                                    {"type": "reasoning", "delta": final_reasoning}
+                                ).encode("utf-8")
+                                + b"\n"
+                            )
                         # Stream content in word-sized chunks for smooth UX
                         for part in chunk_content(final_content):
                             yield (
@@ -1319,6 +1336,7 @@ async def chat_completion(
                         message=ChatMessageResponse(
                             role="assistant",
                             content=final_content,
+                            reasoning=response.get("reasoning"),
                             tool_calls=None,
                         ),
                         finish_reason=response.get("finish_reason", "stop"),
diff --git a/frigate/api/defs/response/chat_response.py b/frigate/api/defs/response/chat_response.py
index 0bc864ba68..c2b3e6b1f2 100644
--- a/frigate/api/defs/response/chat_response.py
+++ b/frigate/api/defs/response/chat_response.py
@@ -20,6 +20,10 @@ class ChatMessageResponse(BaseModel):
     content: Optional[str] = Field(
         default=None, description="Message content (None if tool calls present)"
     )
+    reasoning: Optional[str] = Field(
+        default=None,
+        description="Separated reasoning/thinking trace if the model emitted one",
+    )
     tool_calls: Optional[list[ToolCallInvocation]] = Field(
         default=None, description="Tool calls if LLM wants to call tools"
     )
diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py
index 76ee8b888c..864092df58 100644
--- a/frigate/genai/__init__.py
+++ b/frigate/genai/__init__.py
@@ -300,6 +300,10 @@ class GenAIClient:
         Returns:
             Dictionary with:
             - 'content': Optional[str] - The text response from the LLM, None if tool calls
+            - 'reasoning': Optional[str] - The separated reasoning/thinking trace
+              if the model emitted one (e.g. via OpenAI-compatible
+              `reasoning_content`). None when the model does not surface a
+              trace or the provider does not parse it.
             - 'tool_calls': Optional[List[Dict]] - List of tool calls if LLM wants to call tools.
               Each tool call dict has:
                 - 'id': str - Unique identifier for this tool call
@@ -311,6 +315,14 @@ class GenAIClient:
                 - 'length': Hit token limit
                 - 'error': An error occurred
 
+        Streaming counterpart `chat_with_tools_stream` yields
+        ``(kind, value)`` tuples where ``kind`` is one of:
+            - 'content_delta': value is a string fragment of the answer
+            - 'reasoning_delta': value is a string fragment of the reasoning
+              trace (emitted before content for thinking models)
+            - 'stats': value is a usage stats dict
+            - 'message': value is the final dict shape described above
+
         Raises:
             NotImplementedError: If the provider doesn't implement this method.
         """
@@ -321,6 +333,7 @@ class GenAIClient:
         )
         return {
             "content": None,
+            "reasoning": None,
             "tool_calls": None,
             "finish_reason": "error",
         }
diff --git a/frigate/genai/plugins/llama_cpp.py b/frigate/genai/plugins/llama_cpp.py
index 6e2fc910c4..830dd6817b 100644
--- a/frigate/genai/plugins/llama_cpp.py
+++ b/frigate/genai/plugins/llama_cpp.py
@@ -531,16 +531,24 @@ class LlamaCppClient(GenAIClient):
         return payload
 
     def _message_from_choice(self, choice: dict[str, Any]) -> dict[str, Any]:
-        """Parse OpenAI-style choice into {content, tool_calls, finish_reason}."""
+        """Parse OpenAI-style choice into {content, reasoning, tool_calls, finish_reason}.
+
+        llama.cpp's `--reasoning-format` puts the trace in
+        `message.reasoning_content` (preferred) or `message.thinking`; both
+        keys are accepted so different builds work without configuration.
+        """
         message = choice.get("message", {})
         content = message.get("content")
         content = content.strip() if content else None
+        reasoning = message.get("reasoning_content") or message.get("thinking")
+        reasoning = reasoning.strip() if reasoning else None
         tool_calls = parse_tool_calls_from_message(message)
         finish_reason = choice.get("finish_reason") or (
             "tool_calls" if tool_calls else "stop" if content else "error"
         )
         return {
             "content": content,
+            "reasoning": reasoning,
             "tool_calls": tool_calls,
             "finish_reason": finish_reason,
         }
@@ -803,6 +811,7 @@ class LlamaCppClient(GenAIClient):
         try:
             payload = self._build_payload(messages, tools, tool_choice, stream=True)
             content_parts: list[str] = []
+            reasoning_parts: list[str] = []
             tool_calls_by_index: dict[int, dict[str, Any]] = {}
             finish_reason = "stop"
 
@@ -832,6 +841,15 @@ class LlamaCppClient(GenAIClient):
                         delta = choices[0].get("delta", {})
                         if choices[0].get("finish_reason"):
                             finish_reason = choices[0]["finish_reason"]
+                        # llama.cpp emits separated thinking under
+                        # reasoning_content (preferred) or thinking before any
+                        # content tokens arrive
+                        reasoning_delta = delta.get("reasoning_content") or delta.get(
+                            "thinking"
+                        )
+                        if reasoning_delta:
+                            reasoning_parts.append(reasoning_delta)
+                            yield ("reasoning_delta", reasoning_delta)
                         if delta.get("content"):
                             content_parts.append(delta["content"])
                             yield ("content_delta", delta["content"])
@@ -857,6 +875,7 @@ class LlamaCppClient(GenAIClient):
                                 )
 
             full_content = "".join(content_parts).strip() or None
+            full_reasoning = "".join(reasoning_parts).strip() or None
             tool_calls_list = self._streamed_tool_calls_to_list(tool_calls_by_index)
             if tool_calls_list:
                 finish_reason = "tool_calls"
@@ -864,6 +883,7 @@ class LlamaCppClient(GenAIClient):
                 "message",
                 {
                     "content": full_content,
+                    "reasoning": full_reasoning,
                     "tool_calls": tool_calls_list,
                     "finish_reason": finish_reason,
                 },
diff --git a/web/public/locales/en/views/chat.json b/web/public/locales/en/views/chat.json
index bc320c2049..4cd3ad20f6 100644
--- a/web/public/locales/en/views/chat.json
+++ b/web/public/locales/en/views/chat.json
@@ -60,5 +60,10 @@
   "stats": {
     "context": "{{tokens}} tokens",
     "tokens_per_second": "{{rate}} t/s"
+  },
+  "reasoning": {
+    "thinking": "Thinking…",
+    "show": "Show reasoning",
+    "hide": "Hide reasoning"
   }
 }
diff --git a/web/src/components/chat/ReasoningBubble.tsx b/web/src/components/chat/ReasoningBubble.tsx
new file mode 100644
index 0000000000..580a99c45d
--- /dev/null
+++ b/web/src/components/chat/ReasoningBubble.tsx
@@ -0,0 +1,87 @@
+import { useState, useEffect, useRef } from "react";
+import { useTranslation } from "react-i18next";
+import { LuBrain, LuChevronDown, LuChevronRight } from "react-icons/lu";
+import {
+  Collapsible,
+  CollapsibleContent,
+  CollapsibleTrigger,
+} from "@/components/ui/collapsible";
+import { Button } from "@/components/ui/button";
+import { cn } from "@/lib/utils";
+
+type ReasoningBubbleProps = {
+  /** The accumulated reasoning text from the model. */
+  reasoning: string;
+  /**
+   * Whether the assistant has begun producing the user-facing answer.
+   * While false the reasoning is still streaming and we keep the panel
+   * open with a "Thinking…" label. Once true, the panel auto-collapses
+   * so the answer is the primary focus, but stays expandable.
+   */
+  answerStarted: boolean;
+};
+
+export function ReasoningBubble({
+  reasoning,
+  answerStarted,
+}: ReasoningBubbleProps) {
+  const { t } = useTranslation(["views/chat"]);
+  // Open while the model is still mid-thought (no answer tokens yet);
+  // once the answer begins, collapse on its own but let the user reopen.
+  const [open, setOpen] = useState(true);
+  const userInteractedRef = useRef(false);
+  const lastAutoState = useRef(true);
+
+  useEffect(() => {
+    if (userInteractedRef.current) return;
+    const desired = !answerStarted;
+    if (desired !== lastAutoState.current) {
+      lastAutoState.current = desired;
+      setOpen(desired);
+    }
+  }, [answerStarted]);
+
+  const handleOpenChange = (next: boolean) => {
+    userInteractedRef.current = true;
+    setOpen(next);
+  };
+
+  const label = !answerStarted
+    ? t("reasoning.thinking")
+    : open
+      ? t("reasoning.hide")
+      : t("reasoning.show");
+
+  return (
+    <div className="self-start rounded-2xl bg-muted/60 px-3 py-2 text-muted-foreground">
+      <Collapsible open={open} onOpenChange={handleOpenChange}>
+        <CollapsibleTrigger asChild>
+          <Button
+            variant="ghost"
+            size="sm"
+            className="h-auto w-full min-w-0 justify-start gap-2 whitespace-normal p-0 text-left text-xs hover:bg-transparent"
+          >
+            <LuBrain
+              className={cn(
+                "size-3 shrink-0",
+                !answerStarted && "animate-pulse",
+              )}
+            />
+            <span className="break-words font-medium">{label}</span>
+            {answerStarted &&
+              (open ? (
+                <LuChevronDown className="ml-auto size-3 shrink-0" />
+              ) : (
+                <LuChevronRight className="ml-auto size-3 shrink-0" />
+              ))}
+          </Button>
+        </CollapsibleTrigger>
+        <CollapsibleContent>
+          <pre className="scrollbar-container mt-2 max-h-64 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 font-sans text-xs leading-relaxed">
+            {reasoning}
+          </pre>
+        </CollapsibleContent>
+      </Collapsible>
+    </div>
+  );
+}
diff --git a/web/src/pages/Chat.tsx b/web/src/pages/Chat.tsx
index 970fa3d364..16fe615760 100644
--- a/web/src/pages/Chat.tsx
+++ b/web/src/pages/Chat.tsx
@@ -7,6 +7,7 @@ import { useState, useCallback, useRef, useEffect, useMemo } from "react";
 import axios from "axios";
 import { ChatEventThumbnailsRow } from "@/components/chat/ChatEventThumbnailsRow";
 import { MessageBubble } from "@/components/chat/ChatMessage";
+import { ReasoningBubble } from "@/components/chat/ReasoningBubble";
 import { ToolCallsGroup } from "@/components/chat/ToolCallsGroup";
 import { ChatStartingState } from "@/components/chat/ChatStartingState";
 import { ChatAttachmentChip } from "@/components/chat/ChatAttachmentChip";
@@ -200,15 +201,18 @@ export default function ChatPage() {
                   const hasToolCalls =
                     msg.toolCalls && msg.toolCalls.length > 0;
                   const hasContent = !!msg.content?.trim();
+                  const hasReasoning = !!msg.reasoning?.trim();
                   const showProcessing =
-                    isLastAssistant && isLoading && !hasContent;
+                    isLastAssistant && isLoading && !hasContent && !hasReasoning;
 
-                  // Hide empty placeholder only when there are no tool calls yet
+                  // Hide empty placeholder only when there are no tool calls
+                  // and no reasoning streaming yet
                   if (
                     isLastAssistant &&
                     isLoading &&
                     !hasContent &&
-                    !hasToolCalls
+                    !hasToolCalls &&
+                    !hasReasoning
                   )
                     return (
                       <div
@@ -226,6 +230,12 @@ export default function ChatPage() {
                       {msg.role === "assistant" && hasToolCalls && (
                         <ToolCallsGroup toolCalls={msg.toolCalls!} />
                       )}
+                      {msg.role === "assistant" && hasReasoning && (
+                        <ReasoningBubble
+                          reasoning={msg.reasoning!}
+                          answerStarted={hasContent}
+                        />
+                      )}
                       {showProcessing ? (
                         <div className="flex items-center gap-2 self-start rounded-2xl bg-muted px-5 py-4">
                           <span className="size-2 animate-bounce rounded-full bg-muted-foreground/60 [animation-delay:-0.3s]" />
diff --git a/web/src/types/chat.ts b/web/src/types/chat.ts
index d9d3948b53..db6d84bf58 100644
--- a/web/src/types/chat.ts
+++ b/web/src/types/chat.ts
@@ -7,6 +7,7 @@ export type ToolCall = {
 export type ChatMessage = {
   role: "user" | "assistant";
   content: string;
+  reasoning?: string;
   toolCalls?: ToolCall[];
   stats?: ChatStats;
 };
diff --git a/web/src/utils/chatUtil.ts b/web/src/utils/chatUtil.ts
index 1cb5454f67..5389f7aff8 100644
--- a/web/src/utils/chatUtil.ts
+++ b/web/src/utils/chatUtil.ts
@@ -27,6 +27,7 @@ type StreamChunk =
   | { type: "error"; error: string }
   | { type: "tool_calls"; tool_calls: ToolCall[] }
   | { type: "content"; delta: string }
+  | { type: "reasoning"; delta: string }
   | StatsChunk;
 
 /**
@@ -109,6 +110,19 @@ export async function streamChatCompletion(
         });
         return "continue";
       }
+      if (data.type === "reasoning" && data.delta !== undefined) {
+        updateMessages((prev) => {
+          const next = [...prev];
+          const lastMsg = next[next.length - 1];
+          if (lastMsg?.role === "assistant")
+            next[next.length - 1] = {
+              ...lastMsg,
+              reasoning: (lastMsg.reasoning ?? "") + data.delta,
+            };
+          return next;
+        });
+        return "continue";
+      }
       if (data.type === "stats") {
         const stats: ChatStats = {
           promptTokens: data.prompt_tokens,