"""
Base agent interface for OfficeArena GUI agents.
"""

from abc import ABC, abstractmethod
from typing import Any, Dict, Optional

from ..adapter import ActionAdapter


class BaseAgent(ABC):
    """
    Base class for GUI agents that can interact with Office applications.

    This interface defines the contract that all agents must implement to work
    with the OfficeArena evaluation environment.

    Expected Output Format:
    Agents should return responses in JSON format with the following structure:

    {
        "status": "completed",
        "output": [
            {
                "type": "reasoning",
                "summary": [{"text": "explanation of what the agent is thinking"}]
            },
            {
                "type": "computer_call",
                "action": {
                    "type": <action_type>,
                    **action_args
                }
            },
            {// Optional: message to user
                "type": "message",
                "content": [{"type": "output_text", "text": "message to user"}]
            }
        ]
    }

    Action Arguments for each action types from OpenAI Computer Use Agent:
    https://platform.openai.com/docs/guides/tools-computer-use
    - screenshot: {}  # No arguments needed
    - click: {"x": <x_coord>, "y": <y_coord>, button: "left"|"right"|"middle"}
    - left_click: {"x": <x_coord>, "y": <y_coord>}
    - right_click: {"x": <x_coord>, "y": <y_coord>}
    - middle_click: {"x": <x_coord>, "y": <y_coord>}
    - double_click: {"x": <x_coord>, "y": <y_coord>}
    - move: {"x": <x_coord>, "y": <y_coord>}
    - scroll: {"direction": "up"|"down", "amount": <scroll_amount>, ["x": <x_coord>, "y": <y_coord>]}
    - drag: {"path": [{"x": x1, "y": y1}, {"x": x2, "y": y2}]}
    - keypress: {"keys": ["key1", "key2"]}
    - type: {"text": "text to type"}
    - wait: {"duration": <seconds>},
    - finish: {"message": "optional message to user"}

    Alternatively, use the action types and arguments defined in the ScreenEnv:
    https://github.com/huggingface/screenenv/tree/main

    - screenshot: {}
    - left_click: {"x": <x_coord>, "y": <y_coord>}
    - right_click: {"x": <x_coord>, "y": <y_coord>}
    - middle_click: {"x": <x_coord>, "y": <y_coord>}
    - double_click: {"x": <x_coord>, "y": <y_coord>}
    - scroll: {"direction": "up"|"down", "amount": <scroll_amount>}
    - move_mouse: {"x": <x_coord>, "y": <y_coord>}
    - mouse_press: {"button": "left"|"right"|"middle"}
    - mouse_release: {"button": "left"|"right"|"middle"}
    - write: {"text": "text to write", "delay_in_ms": <delay>}
    - press: {"key": "key_or_key_list"}
    - drag: {"fr": [<x1>, <y1>], "to": [<x2>, <y2>]}
    - wait: {"ms": <milliseconds>}
    - open: {"file_or_url": "path_or_url"}
    - launch: {"application": "app_name", "wait_for_window": <boolean>}
    - close_window: {"window_id": "id"}
    - activate_window: {"window_id": "id"}
    - execute_command: {"command": "shell_command", "background": <boolean>, "timeout": <seconds>}
    - execute_python_command: {"command": "python_code", "import_prefix": ["module1", "module2"]}
    - get_cursor_position: {}
    - get_screen_size: {}
    - get_window_title: {"window_id": "id"}
    - get_application_windows: {"application": "app_name"}
    - get_terminal_output: {}

    """

    def __init__(self, config: Optional[Dict[str, Any]] = None):
        """
        Initialize the agent with optional configuration.

        Args:
            config: Optional configuration dictionary
        """
        self.config = config or {}

    @property
    def action_adapter_class(self) -> type[ActionAdapter]:
        return ActionAdapter

    @abstractmethod
    def step(self, screenshot: bytes, instruction: str) -> str:
        """
        Take a single step given the current screenshot and instruction.

        Args:
            screenshot: Current screenshot as bytes
            instruction: Task instruction or prompt

        Returns:
            JSON string in the expected format (see class docstring for details).
            Must include both reasoning and either a computer_call or message.
        """
        pass

    @abstractmethod
    def reset(self) -> None:
        """
        Reset the agent state for a new task.
        """
        pass

    def close(self) -> None:
        """
        Clean up any resources used by the agent.
        Default implementation does nothing.
        """
        pass
