import json
import base64
import logging
from string import Template
from typing import Any, Dict, Optional, Union

from azure.identity import DefaultAzureCredential
from openai import AzureOpenAI, OpenAI

from .base import BaseAgent
from .config import CUAConfig

CUA_INSTRUCTION = Template("""
Task: ${instruction}

You absolutely must avoid asking any clarification or follow-up questions--just execute the task as best you can with what you're given.
Refrain from asking any "Yes" or "No" questions about whether you should proceed--just assume the answer is always "Yes".
When you are done with the task (or have tried and found you cannot complete it), you must explicitly communicate so by prepending the phrase "DONE." to your message.
""")


class CUA(BaseAgent):
    """
    An agent that uses an OpenAI Computer Use Agent (CUA) model.
    """

    def __init__(self, config: Optional[Union[Dict[str, Any], CUAConfig]] = None):
        # Handle both dict and CUAConfig inputs for backward compatibility
        if isinstance(config, dict):
            # Handle legacy dict format with display_size
            config_dict = config.copy()
            try:
                self.agent_config = CUAConfig(**config_dict)
            except TypeError:
                # Handle legacy dict format with display_size
                display_size = config_dict.pop("display_size", {"width": 1024, "height": 768})
                config_dict["display_width"] = display_size["width"]
                config_dict["display_height"] = display_size["height"]
                self.agent_config = CUAConfig(**config_dict)
        elif isinstance(config, CUAConfig):
            self.agent_config = config
        elif config is None:
            # Use default configuration
            self.agent_config = CUAConfig()
        else:
            raise ValueError(f"Config must be dict or CUAConfig, got {type(config)}")
        
        # Validate configuration
        self.agent_config.validate()
        
        # Initialize base class with dict format for backward compatibility
        super().__init__(self.agent_config.to_dict())

        # Extract configuration values
        self.model = self.agent_config.model_name
        base_url = self.agent_config.base_url
        endpoint = self.agent_config.endpoint
        api_key = self.agent_config.api_key

        # Initialize OpenAI client
        if endpoint == "azure":
            api_version = self.agent_config.api_version

            def token_provider() -> str:
                credential = DefaultAzureCredential()
                token = credential.get_token("https://cognitiveservices.azure.com/.default").token
                return token

            self.client = AzureOpenAI(
                azure_endpoint=base_url,
                azure_ad_token_provider=token_provider,
                api_version=api_version,
            )
        else:
            if not api_key:
                raise ValueError("api_key must be provided for the OpenAI endpoint.")
            self.client = OpenAI(api_key=api_key, base_url=base_url)

        self.previous_response_id: Optional[str] = None
        self.previous_computer_call_id: Optional[str] = None
        self.display_size: Dict[str, int] = self.agent_config.display_size
        self.tools = [
            {
                "type": "computer-preview",
                "display_width": self.display_size["width"],
                "display_height": self.display_size["height"],
                "environment": self.agent_config.environment,
            }
        ]
        # Log the tool configuration
        logging.getLogger(__name__).debug(f"CUA tools configured: {self.tools}")

    def step(self, screenshot: bytes, instruction: str) -> str:
        """
        Takes a step in the environment using the CUA model.
        """
        screenshot_b64 = base64.b64encode(screenshot).decode("utf-8")

        if self.previous_response_id is None:
            # First step: Start the conversation
            prompt = CUA_INSTRUCTION.substitute(instruction=instruction)
            response = self.client.responses.create(
                model=self.model,
                input=prompt,
                tools=self.tools,
                truncation=self.agent_config.truncation,
                temperature=self.agent_config.temperature,
                top_p=self.agent_config.top_p,
            )
        else:
            # Subsequent steps: Continue the conversation
            data = [
                {
                    "type": "computer_call_output",
                    "call_id": self.previous_computer_call_id,
                    "output": {
                        "type": "input_image",
                        "image_url": f"data:image/png;base64,{screenshot_b64}",
                    },
                }
            ]
            response = self.client.responses.create(
                model=self.model,
                previous_response_id=self.previous_response_id,
                input=data,
                tools=self.tools,
                truncation=self.agent_config.truncation,
                temperature=self.agent_config.temperature,
                top_p=self.agent_config.top_p,
                extra_headers={},
            )

        response_dict = response.dict()
        self.previous_response_id = response_dict.get("id")

        # Find the computer_call to get the next call_id
        for item in response_dict.get("output", []):
            if item.get("type") == "computer_call":
                self.previous_computer_call_id = item.get("call_id") or item.get("id")
                break
        logging.getLogger(__name__).debug(f"CUA response: {response_dict}")
        return json.dumps(response_dict)

    def reset(self) -> None:
        """
        Resets the agent's conversation state for a new task.
        """
        self.previous_response_id = None
        self.previous_computer_call_id = None
