frigate/frigate/genai/gemini.py

"""Gemini Provider for Frigate AI."""

import logging
from typing import Any, Optional

from google import genai
from google.genai import errors, types

from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider

logger = logging.getLogger(__name__)


@register_genai_provider(GenAIProviderEnum.gemini)
class GeminiClient(GenAIClient):
    """Generative AI client for Frigate using Gemini."""

    provider: genai.Client

    def _init_provider(self):
        """Initialize the client."""
        # Merge provider_options into HttpOptions
        http_options_dict = {
            "timeout": int(self.timeout * 1000),  # requires milliseconds
            "retry_options": types.HttpRetryOptions(
                attempts=3,
                initial_delay=1.0,
                max_delay=60.0,
                exp_base=2.0,
                jitter=1.0,
                http_status_codes=[429, 500, 502, 503, 504],
            ),
        }

        if isinstance(self.genai_config.provider_options, dict):
            http_options_dict.update(self.genai_config.provider_options)

        return genai.Client(
            api_key=self.genai_config.api_key,
            http_options=types.HttpOptions(**http_options_dict),
        )

    def _send(self, prompt: str, images: list[bytes]) -> Optional[str]:
        """Submit a request to Gemini."""
        contents = [
            types.Part.from_bytes(data=img, mime_type="image/jpeg") for img in images
        ] + [prompt]
        try:
            # Merge runtime_options into generation_config if provided
            generation_config_dict = {"candidate_count": 1}
            generation_config_dict.update(self.genai_config.runtime_options)

            response = self.provider.models.generate_content(
                model=self.genai_config.model,
                contents=contents,
                config=types.GenerateContentConfig(
                    **generation_config_dict,
                ),
            )
        except errors.APIError as e:
            logger.warning("Gemini returned an error: %s", str(e))
            return None
        except Exception as e:
            logger.warning("An unexpected error occurred with Gemini: %s", str(e))
            return None

        try:
            description = response.text.strip()
        except (ValueError, AttributeError):
            # No description was generated
            return None
        return description

    def get_context_size(self) -> int:
        """Get the context window size for Gemini."""
        # Gemini Pro Vision has a 1M token context window
        return 1000000

    def chat_with_tools(
        self,
        messages: list[dict[str, Any]],
        tools: Optional[list[dict[str, Any]]] = None,
        tool_choice: Optional[str] = "auto",
    ) -> dict[str, Any]:
        """
        Send chat messages to Gemini with optional tool definitions.

        Implements function calling/tool usage for Gemini models.
        """
        try:
            # Convert messages to Gemini format
            gemini_messages = []
            for msg in messages:
                role = msg.get("role", "user")
                content = msg.get("content", "")

                # Map roles to Gemini format
                if role == "system":
                    # Gemini doesn't have system role, prepend to first user message
                    if gemini_messages and gemini_messages[0].role == "user":
                        gemini_messages[0].parts[
                            0
                        ].text = f"{content}\n\n{gemini_messages[0].parts[0].text}"
                    else:
                        gemini_messages.append(
                            types.Content(
                                role="user", parts=[types.Part.from_text(text=content)]
                            )
                        )
                elif role == "assistant":
                    gemini_messages.append(
                        types.Content(
                            role="model", parts=[types.Part.from_text(text=content)]
                        )
                    )
                elif role == "tool":
                    # Handle tool response
                    function_response = {
                        "name": msg.get("name", ""),
                        "response": content,
                    }
                    gemini_messages.append(
                        types.Content(
                            role="function",
                            parts=[
                                types.Part.from_function_response(function_response)
                            ],
                        )
                    )
                else:  # user
                    gemini_messages.append(
                        types.Content(
                            role="user", parts=[types.Part.from_text(text=content)]
                        )
                    )

            # Convert tools to Gemini format
            gemini_tools = None
            if tools:
                gemini_tools = []
                for tool in tools:
                    if tool.get("type") == "function":
                        func = tool.get("function", {})
                        gemini_tools.append(
                            types.Tool(
                                function_declarations=[
                                    types.FunctionDeclaration(
                                        name=func.get("name", ""),
                                        description=func.get("description", ""),
                                        parameters=func.get("parameters", {}),
                                    )
                                ]
                            )
                        )

            # Configure tool choice
            tool_config = None
            if tool_choice:
                if tool_choice == "none":
                    tool_config = types.ToolConfig(
                        function_calling_config=types.FunctionCallingConfig(mode="NONE")
                    )
                elif tool_choice == "auto":
                    tool_config = types.ToolConfig(
                        function_calling_config=types.FunctionCallingConfig(mode="AUTO")
                    )
                elif tool_choice == "required":
                    tool_config = types.ToolConfig(
                        function_calling_config=types.FunctionCallingConfig(mode="ANY")
                    )

            # Build request config
            config_params = {"candidate_count": 1}

            if gemini_tools:
                config_params["tools"] = gemini_tools

            if tool_config:
                config_params["tool_config"] = tool_config

            # Merge runtime_options
            if isinstance(self.genai_config.runtime_options, dict):
                config_params.update(self.genai_config.runtime_options)

            response = self.provider.models.generate_content(
                model=self.genai_config.model,
                contents=gemini_messages,
                config=types.GenerateContentConfig(**config_params),
            )

            # Check if response is valid
            if not response or not response.candidates:
                return {
                    "content": None,
                    "tool_calls": None,
                    "finish_reason": "error",
                }

            candidate = response.candidates[0]
            content = None
            tool_calls = None

            # Extract content and tool calls from response
            if candidate.content and candidate.content.parts:
                for part in candidate.content.parts:
                    if part.text:
                        content = part.text.strip()
                    elif part.function_call:
                        # Handle function call
                        if tool_calls is None:
                            tool_calls = []

                        try:
                            arguments = (
                                dict(part.function_call.args)
                                if part.function_call.args
                                else {}
                            )
                        except Exception:
                            arguments = {}

                        tool_calls.append(
                            {
                                "id": part.function_call.name or "",
                                "name": part.function_call.name or "",
                                "arguments": arguments,
                            }
                        )

            # Determine finish reason
            finish_reason = "error"
            if hasattr(candidate, "finish_reason") and candidate.finish_reason:
                from google.genai.types import FinishReason

                if candidate.finish_reason == FinishReason.STOP:
                    finish_reason = "stop"
                elif candidate.finish_reason == FinishReason.MAX_TOKENS:
                    finish_reason = "length"
                elif candidate.finish_reason in [
                    FinishReason.SAFETY,
                    FinishReason.RECITATION,
                ]:
                    finish_reason = "error"
                elif tool_calls:
                    finish_reason = "tool_calls"
                elif content:
                    finish_reason = "stop"
            elif tool_calls:
                finish_reason = "tool_calls"
            elif content:
                finish_reason = "stop"

            return {
                "content": content,
                "tool_calls": tool_calls,
                "finish_reason": finish_reason,
            }

        except errors.APIError as e:
            logger.warning("Gemini API error during chat_with_tools: %s", str(e))
            return {
                "content": None,
                "tool_calls": None,
                "finish_reason": "error",
            }
        except Exception as e:
            logger.warning(
                "Gemini returned an error during chat_with_tools: %s", str(e)
            )
            return {
                "content": None,
                "tool_calls": None,
                "finish_reason": "error",
            }