From ea2e423e114e107b37c42b8436cd9504bc53cd80 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Tue, 19 May 2026 10:23:35 -0600 Subject: [PATCH] Implement reasoning for other providers --- frigate/genai/plugins/gemini.py | 42 +++++++++++++++++++++++++++++---- frigate/genai/plugins/ollama.py | 15 ++++++++++++ frigate/genai/plugins/openai.py | 21 +++++++++++++++++ 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/frigate/genai/plugins/gemini.py b/frigate/genai/plugins/gemini.py index c1046428e6..bcac09d0e3 100644 --- a/frigate/genai/plugins/gemini.py +++ b/frigate/genai/plugins/gemini.py @@ -248,6 +248,13 @@ class GeminiClient(GenAIClient): if tool_config: config_params["tool_config"] = tool_config + # Ask thinking-capable models (Gemini 2.5+) to include their + # reasoning trace as separate `thought` parts so we can surface + # it on the reasoning channel. Older models ignore this field. + config_params["thinking_config"] = types.ThinkingConfig( + include_thoughts=True + ) + # Merge runtime_options if isinstance(self.genai_config.runtime_options, dict): config_params.update(self.genai_config.runtime_options) @@ -262,19 +269,24 @@ class GeminiClient(GenAIClient): if not response or not response.candidates: return { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", } candidate = response.candidates[0] content = None + reasoning_parts: list[str] = [] tool_calls = None - # Extract content and tool calls from response + # Extract content, reasoning, and tool calls from response if candidate.content and candidate.content.parts: for part in candidate.content.parts: if part.text: - content = part.text.strip() + if getattr(part, "thought", False): + reasoning_parts.append(part.text) + else: + content = part.text.strip() elif part.function_call: # Handle function call if tool_calls is None: @@ -297,6 +309,8 @@ class GeminiClient(GenAIClient): } ) + reasoning = "".join(reasoning_parts).strip() or None + # Determine finish reason finish_reason = "error" if hasattr(candidate, "finish_reason") and candidate.finish_reason: @@ -322,6 +336,7 @@ class GeminiClient(GenAIClient): return { "content": content, + "reasoning": reasoning, "tool_calls": tool_calls, "finish_reason": finish_reason, } @@ -330,6 +345,7 @@ class GeminiClient(GenAIClient): logger.warning("Gemini API error during chat_with_tools: %s", str(e)) return { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", } @@ -339,6 +355,7 @@ class GeminiClient(GenAIClient): ) return { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", } @@ -477,12 +494,19 @@ class GeminiClient(GenAIClient): if tool_config: config_params["tool_config"] = tool_config + # Ask thinking-capable models to include their reasoning trace + # as separate `thought` parts (Gemini 2.5+; ignored elsewhere). + config_params["thinking_config"] = types.ThinkingConfig( + include_thoughts=True + ) + # Merge runtime_options if isinstance(self.genai_config.runtime_options, dict): config_params.update(self.genai_config.runtime_options) # Use streaming API content_parts: list[str] = [] + reasoning_parts: list[str] = [] tool_calls_by_index: dict[int, dict[str, Any]] = {} finish_reason = "stop" usage_stats: Optional[dict[str, Any]] = None @@ -519,12 +543,16 @@ class GeminiClient(GenAIClient): ]: finish_reason = "error" - # Extract content and tool calls from chunk + # Extract content, reasoning, and tool calls from chunk if candidate.content and candidate.content.parts: for part in candidate.content.parts: if part.text: - content_parts.append(part.text) - yield ("content_delta", part.text) + if getattr(part, "thought", False): + reasoning_parts.append(part.text) + yield ("reasoning_delta", part.text) + else: + content_parts.append(part.text) + yield ("content_delta", part.text) elif part.function_call: # Handle function call try: @@ -565,6 +593,7 @@ class GeminiClient(GenAIClient): # Build final message full_content = "".join(content_parts).strip() or None + full_reasoning = "".join(reasoning_parts).strip() or None # Convert tool calls to list format tool_calls_list = None @@ -593,6 +622,7 @@ class GeminiClient(GenAIClient): "message", { "content": full_content, + "reasoning": full_reasoning, "tool_calls": tool_calls_list, "finish_reason": finish_reason, }, @@ -604,6 +634,7 @@ class GeminiClient(GenAIClient): "message", { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", }, @@ -616,6 +647,7 @@ class GeminiClient(GenAIClient): "message", { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", }, diff --git a/frigate/genai/plugins/ollama.py b/frigate/genai/plugins/ollama.py index f9d1c62cbf..a6f6d8ddd5 100644 --- a/frigate/genai/plugins/ollama.py +++ b/frigate/genai/plugins/ollama.py @@ -337,6 +337,9 @@ class OllamaClient(GenAIClient): response.get("done"), ) content = message.get("content", "").strip() if message.get("content") else None + reasoning = ( + message.get("thinking", "").strip() if message.get("thinking") else None + ) tool_calls = parse_tool_calls_from_message(message) finish_reason = "error" if response.get("done"): @@ -349,6 +352,7 @@ class OllamaClient(GenAIClient): finish_reason = "stop" return { "content": content, + "reasoning": reasoning, "tool_calls": tool_calls, "finish_reason": finish_reason, } @@ -432,6 +436,9 @@ class OllamaClient(GenAIClient): ) response = await async_client.chat(**request_params) result = self._message_from_response(response) + reasoning = result.get("reasoning") + if reasoning: + yield ("reasoning_delta", reasoning) content = result.get("content") if content: yield ("content_delta", content) @@ -450,6 +457,7 @@ class OllamaClient(GenAIClient): headers=self._auth_headers(), ) content_parts: list[str] = [] + reasoning_parts: list[str] = [] final_message: dict[str, Any] | None = None final_chunk: Any = None stream = await async_client.chat(**request_params) @@ -457,6 +465,10 @@ class OllamaClient(GenAIClient): if not chunk or "message" not in chunk: continue msg = chunk.get("message", {}) + reasoning_delta = msg.get("thinking") or "" + if reasoning_delta: + reasoning_parts.append(reasoning_delta) + yield ("reasoning_delta", reasoning_delta) delta = msg.get("content") or "" if delta: content_parts.append(delta) @@ -464,8 +476,10 @@ class OllamaClient(GenAIClient): if chunk.get("done"): final_chunk = chunk full_content = "".join(content_parts).strip() or None + full_reasoning = "".join(reasoning_parts).strip() or None final_message = { "content": full_content, + "reasoning": full_reasoning, "tool_calls": None, "finish_reason": "stop", } @@ -482,6 +496,7 @@ class OllamaClient(GenAIClient): "message", { "content": "".join(content_parts).strip() or None, + "reasoning": "".join(reasoning_parts).strip() or None, "tool_calls": None, "finish_reason": "stop", }, diff --git a/frigate/genai/plugins/openai.py b/frigate/genai/plugins/openai.py index f9e818fba3..3e5adabb35 100644 --- a/frigate/genai/plugins/openai.py +++ b/frigate/genai/plugins/openai.py @@ -236,6 +236,10 @@ class OpenAIClient(GenAIClient): choice = result.choices[0] message = choice.message content = message.content.strip() if message.content else None + raw_reasoning = getattr(message, "reasoning_content", None) or getattr( + message, "reasoning", None + ) + reasoning = raw_reasoning.strip() if raw_reasoning else None tool_calls = None if message.tool_calls: @@ -270,6 +274,7 @@ class OpenAIClient(GenAIClient): return { "content": content, + "reasoning": reasoning, "tool_calls": tool_calls, "finish_reason": finish_reason, } @@ -278,6 +283,7 @@ class OpenAIClient(GenAIClient): logger.warning("OpenAI request timed out: %s", str(e)) return { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", } @@ -285,6 +291,7 @@ class OpenAIClient(GenAIClient): logger.warning("OpenAI returned an error: %s", str(e)) return { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", } @@ -335,6 +342,7 @@ class OpenAIClient(GenAIClient): # Use streaming API content_parts: list[str] = [] + reasoning_parts: list[str] = [] tool_calls_by_index: dict[int, dict[str, Any]] = {} finish_reason = "stop" usage_stats: Optional[dict[str, Any]] = None @@ -356,6 +364,15 @@ class OpenAIClient(GenAIClient): if choice.finish_reason: finish_reason = choice.finish_reason + # Extract reasoning deltas (reasoning_content or reasoning, + # depending on the server) + reasoning_delta = getattr(delta, "reasoning_content", None) or getattr( + delta, "reasoning", None + ) + if reasoning_delta: + reasoning_parts.append(reasoning_delta) + yield ("reasoning_delta", reasoning_delta) + # Extract content deltas if delta.content: content_parts.append(delta.content) @@ -384,6 +401,7 @@ class OpenAIClient(GenAIClient): # Build final message full_content = "".join(content_parts).strip() or None + full_reasoning = "".join(reasoning_parts).strip() or None # Convert tool calls to list format tool_calls_list = None @@ -412,6 +430,7 @@ class OpenAIClient(GenAIClient): "message", { "content": full_content, + "reasoning": full_reasoning, "tool_calls": tool_calls_list, "finish_reason": finish_reason, }, @@ -423,6 +442,7 @@ class OpenAIClient(GenAIClient): "message", { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", }, @@ -433,6 +453,7 @@ class OpenAIClient(GenAIClient): "message", { "content": None, + "reasoning": None, "tool_calls": None, "finish_reason": "error", },