"""
Action Adapter for OfficeArena

This module provides the ActionAdapter class, which is responsible for
translating high-level actions from an agent into low-level commands
that can be executed by the screenenv. Sandbox environment.
"""

import json
import logging
from typing import Any, Dict

from screenenv import Sandbox


class ActionAdapter:
    """
    Translates agent actions into sandbox commands.
    """

    def __init__(self, sandbox: Sandbox):
        """
        Initializes the ActionAdapter with a sandbox instance.

        Args:
            sandbox: An instance of screenenv.Sandbox.
        """
        self.sandbox = sandbox

    def execute_action(self, action_str: str) -> Dict[str, Any]:
        """
        Parses an action string from the agent and executes it in the sandbox.

        Args:
            action_str: A JSON string representing the agent's action.

        Returns:
            A dictionary containing the result of the action.
        """
        try:
            action_data = json.loads(action_str)
            output = action_data.get("output", [])

            computer_call = None
            for item in output:
                if item.get("type") == "user_interaction":
                    logging.getLogger(__name__).warning("Agent requires user input. Exiting execution.")
                    return {
                        "success": False,
                        "task_completed": True,
                        "result": "Agent requires user input.",
                        "action_type": "user_interaction",
                    }
                if item.get("type") == "computer_call":
                    computer_call = item
                    break
                if item.get("type") == "message" and "DONE" in item.get("content", [{}])[0].get("text", ""):
                    return {
                        "success": True,
                        "task_completed": True,
                        "result": "Task finished by agent.",
                        "action_type": "finish_message",
                    }

            if not computer_call:
                # If there is no computer_call, handle generic function calls
                # First, handle an explicit finish call if present
                for item in output:
                    if item.get("type") == "call" and item.get("action", {}).get("type") == "finish":
                        return {
                            "success": True,
                            "task_completed": True,
                            "result": item["action"].get("message", "Task completed."),
                        }

                # Then, dispatch any other function call (e.g., add_tasks_to_dataset)
                for item in output:
                    if item.get("type") == "call":
                        action = item.get("action", {})
                        action_type = action.pop("type", None)
                        if not action_type:
                            raise ValueError("Call item is missing an action type.")

                        # Dispatch to the adapter for execution
                        result = self._dispatch_action(action_type, action)
                        return {
                            "success": True,
                            "task_completed": False,
                            "result": result,
                            "action_type": action_type,
                        }

                raise ValueError("No 'computer_call' or dispatchable 'call' action found in the output.")

            action = computer_call.get("action", {})
            action_type = action.pop("type")

            # Handle 'finish' action
            if action_type == "finish":
                return {
                    "success": True,
                    "task_completed": True,
                    "result": action.get("message", "DONE. Task completed."),
                    "action_type": "finish",
                }

            # Dispatch to the correct sandbox method
            result = self._dispatch_action(action_type, action)

            return {
                "success": True,
                "task_completed": False,
                "result": result,
                "action_type": action_type,
            }

        except Exception as e:
            logging.getLogger(__name__).error(f"Error in execute_action: {e}")
            return {
                "success": False,
                "task_completed": False,
                "error": str(e),
                "action_type": "error"
            }

    def _dispatch_action(self, action_type: str, args: Dict[str, Any]) -> Any:
        """
        Dispatches the action to the corresponding sandbox method.

        Args:
            action_type: The type of the action to execute.
            args: The arguments for the action.

        Returns:
            The result of the executed sandbox method.
        """
        # CUA to ScreenEnv mapping
        if action_type == "click":
            button = args.pop("button", "left")
            action_type = f"{button}_click"
        elif action_type == "type":
            action_type = "write"
        elif action_type == "keypress":
            action_type = "press"
            args["key"] = args.pop("keys")
        elif action_type == "move":
            action_type = "move_mouse"
        elif action_type == "scroll":
            # Move to the correct region for CUA scroll
            if "x" in args and "y" in args:
                self.sandbox.move_mouse(x=args.pop("x"), y=args.pop("y"))
            if "direction" in args:
                print("Directional scroll detected")
                self.sandbox.scroll(direction=args["direction"], amount=args.get("amount", 1))
                return
            else:
                if "scroll_y" in args:
                    scroll_y = args['scroll_y']
                else:
                    scroll_y = 0
                # For vertical scrolling, use button 4 (scroll up) or button 5 (scroll down)
                if scroll_y != 0:
                    button = 4 if scroll_y < 0 else 5
                    clicks = abs(scroll_y)
                    self.sandbox.execute_command(f"xdotool click --repeat {int(str(clicks)[0])} {button}")
                if "scroll_x" in args:
                    scroll_x = args['scroll_x']
                else:
                    scroll_x = 0
                # For horizontal scrolling, use button 6 (scroll left) or button 7 (scroll right)
                if scroll_x != 0:
                    button = 6 if scroll_x < 0 else 7
                    clicks = abs(scroll_x)
                    self.sandbox.execute_command(f"xdotool click --repeat {int(str(clicks)[0])} {button}")
            return
        elif action_type == "wait":
            if "duration" in args:  # CUA is in seconds
                duration = int(args["duration"] * 1000)
            elif "ms" in args:  # screenenv is in ms
                duration = args["ms"]
            else:
                duration = 5000  # Default to 5 seconds if no duration is specified
            self.sandbox.wait(duration)
            return
        elif action_type == "drag":
            if "path" in args:  # CUA format
                path = args["path"]
                if len(path) >= 2:
                    fr = (path[0]["x"], path[0]["y"])
                    to = (path[-1]["x"], path[-1]["y"])
                    # Note: ScreenEnv drag only supports start and end.
                    return self.sandbox.drag(fr, to)
            else:  # ScreenEnv format
                return self.sandbox.drag(**args)
        # Pass through to sandbox
        if hasattr(self.sandbox, action_type):
            method = getattr(self.sandbox, action_type)
            return method(**args)
        else:
            raise AttributeError(f"Sandbox does not have an action named '{action_type}'")
