from typing import Literal, Optional, Union, Any

try:
    from typing_extensions import TypedDict
except ImportError:
    from typing import TypedDict  # Python 3.12+

from openai.types import CompletionUsage
from pydantic import BaseModel
from pydantic.fields import Field


class BoundingBoxDict(TypedDict, total=False):
    """Bounding box dictionary structure."""
    x_min: int
    y_min: int
    x_max: int
    y_max: int


class UIElementDict(TypedDict, total=False):
    """UI element dictionary structure for ui_content_full_dict.
    
    All fields are optional to support flexible matching and partial data.
    """
    index: int
    text: Optional[str]
    content_description: Optional[str]
    hint_text: Optional[str]
    tooltip: Optional[str]
    resource_id: Optional[str]
    class_name: Optional[str]
    package_name: Optional[str]
    is_clickable: bool
    is_long_clickable: bool
    is_scrollable: bool
    is_checkable: bool
    is_checked: bool
    is_enabled: bool
    is_focusable: bool
    is_focused: bool
    is_password: bool
    is_selected: bool
    is_editable: bool
    is_visible: bool
    actions: list[str]
    bbox_pixels: BoundingBoxDict



class ActionTranslatorOutput(BaseModel):
  thought: str
  soft_action: str = Field(description="Contain three lines of code: \nkwargs = {{...}}  # A dictionary describing the target UI element\nindex = env_op.find_element(**kwargs)  # Use env_op to locate the element dynamically\nenv_op.xxx(...)  # Replace xxx with the correct action using the index")


class ConcluderOutput(BaseModel):
  episode_conclusion: str
  reflection: Optional[str] = None


class BreakpointAnalyzerOutput(BaseModel):
  observation: Optional[str] = None
  completed_tasks: Optional[str] = None
  plan_reason: Optional[str] = None
  plan_list: Optional[str] = None
  code_diagnosis: Optional[str] = None
  to_continue: Optional[str] = None


class ScreenObs(BaseModel):
  screenshot: Optional[Any] = None
  screenshot_resized: Optional[Any] = None
  screenshot_with_som: Optional[Any] = None
  screenshot_with_som_resized: Optional[Any] = None
  screenshot_path: Optional[str] = None
  screenshot_w_som_path: Optional[str] = None
  ui_forest: Optional[Any] = None
  ui_elements: Optional[Any] = None
  # V2 naming: simple string for LLM/logs, full dict for engineering.
  ui_content_simple_str: Optional[str] = None
  ui_content_path: Optional[str] = None
  ui_content_full_dict: Optional[list[UIElementDict]] = None


# from action execution result in env_op
class EnvExecStepInfo(BaseModel):
  # V2: store structured UI for offline engineering (e.g., coordinate->element matching)
  # Use list[dict] (UI element dicts). Do NOT store string UI here.
  before_ui_content_full_dict: Optional[list[UIElementDict]] = None
  before_screenshot_path: Optional[str] = None
  before_screenshot_w_som_path: Optional[str] = None
  executed_action: str = ''
  related_elements: Optional[str] = ''
  related_target: Optional[int | str] = None
  action_feedback: Optional[str] = None
  is_screen_changed: bool = False
  after_ui_content_full_dict: Optional[list[UIElementDict]] = None
  after_screenshot_path: Optional[str] = None
  after_screenshot_w_som_path: Optional[str] = None


class MllmMatchTarget(BaseModel):
  thought: str
  target_index: int
  confidence_score: int


class ExecResult(BaseModel):
  executed_code: str = ''
  error_statement: Optional[str] = None
  error_message: Optional[str] = None # from exec e
  exec_feedback: Optional[str] = ''
  answer_return: Optional[str] = None # answer return: complete, N/A, None, answer text
  agent_done: Optional[bool] = False
  done: Optional[bool] = False


# class ReActStepInfo(BaseModel):
#   step_n: int
#   ui_content: str # ui_list
#   obs_description: str # from planner output
#   completed_tasks: Optional[str] = '' # from planner output - tasks completed so far
#   action_reason: str # from planner output
#   action: str # actually executed_code
#   soft_coded_action: str # generated by planner, refined in extract_ui_value
#   related_elements: str # from execution result
#   execution_summary: str # from summarizer output
#   action_step_str: str # f'{self.env_op.action_history[-1]}\nExecution Summary:{execution_summary}'
#   exec_step_info: EnvExecStepInfo

class ReActStepInfo(BaseModel):
  step_n: int
  # from planner, summarizer
  obs_description: str # from planner output
  completed_tasks: Optional[str] = '' # from planner output - tasks completed so far
  action_reason: str # from planner output
  hard_coded_action: str # from planner output (what was executed during exploration)
  soft_coded_action: Optional[str] = '' # from ActionTranslator (filled in building/translation phase)
  execution_summary: str # from summarizer output
  # from env_op: action execution result
  exec_step_info: EnvExecStepInfo


class ReActTraj(BaseModel):
  task: str  # This is task_goal
  reflection: Optional[str] = None
  traj: list[ReActStepInfo]
  action_history: list[str]
  env_success_score: float = 0.0
  agent_done_bool: bool = False
  final_success_score: float = 0.0
  final_success_bool: bool = False
  conclusion: Optional[str] = None
  # Metadata for trajectory management
  instance_id: Optional[int] = None
  timestamp: Optional[str] = None
  num_steps: Optional[int] = None
  agent_config: Optional[dict] = None  # GUI Agent configuration used to generate this trajectory
  round: Optional[int] = None  # Round number in multi-round exploration (0-based)
  
  def to_dict(self):
    """Convert ReActTraj to dictionary format (alias for model_dump)."""
    return self.model_dump()
  

class RPAExecTraj(BaseModel):
  task: str = ''
  function_call: str = '' # one line used to call function, from MLLM: `extract params`
  rpa_code: str = '' # from rpa_bank
  exec_result: Optional[ExecResult] = None
  success_score: float = 0.0
  success: bool = False
  action_history: Optional[list] = []
  traj: list[EnvExecStepInfo] = []  # Trajectory of page-related actions (renamed from env_op_traj)
  fix_evaluator_analysis: Optional[str] = None # from Breakpoint_Analyzer_Agent
  fix_react_traj: Optional[list[ReActTraj]] = None
  conclusion: Optional[str] = None
  reflection: Optional[str] = None

# RPA Function
class RPAInfo(BaseModel):
  output_type: Literal['rpa_func'] = 'rpa_func'
  task_type: str = ""
  parameters: str = Field(
    description="Define function parameters from the Task Template. Names must be generic and reusable across tasks. Make all parameters Optional to improve generalization and flexibility."
  )
  rpa_description: str = Field(
    description="Briefly describe the task the RPA function performs (under 30 words)."
  )
  rpa_code: str = Field(
    description="Provide a single Python code block with well-commented code ready for execution on GUI device. Wrap the code in a reusable function (e.g., def function_name():) with generic parameters. Ensure the code handles all cases."
  )
  example_usage: str = Field(
    default="",
    description="A real example demonstrating how to call the code"
  )
  conclusion: str = Field(
    default="",
    description="Summarize how the RPA Code was constructed, considering screen state, input parameters, and task context. Explain how the code adapts to UI changes, influenced by the Task Template or previous RPA Code. Describe robustness strategies and why it's reusable across similar tasks."
  )


# Tool Calling
class FetchInfoTool(BaseModel):
  output_type: Literal['fetch_info'] = Field(
    default='fetch_info',
    description="Fetch detailed information of a specific step in the trajectory, such as screenshots and a list of UI elements"
  )
  traj_id: str = Field(
    description="Which trajectory to fetch from, e.g., 'pre_rpa_exec_traj', 'successful_react_traj', 'failed_react_traj', 'fix_react_traj'"
  )
  step_n: int = Field(
    description="Step number to inspect (1-based index)"
  )

class RPABuilderOutput_optional(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  info_to_clarify: Optional[str] = Field(
    default="None",
    description="Describe whether you need to gain a better understanding of the page details and ensure the code runs smoothly, and if so, to investigate which step details are necessary to obtain what type of information"
  )
  output: Union[RPAInfo, FetchInfoTool]

class RPABuilderOutput_tool(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  info_to_clarify: Optional[str] = Field(
    default="None",
    description="You need to gain a better understanding of the page details and ensure the code runs smoothly, so, try to investigate which step details are necessary to obtain what type of information"
  )
  output: FetchInfoTool


class RPABuilderOutput(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  output: RPAInfo


class ParamsExtractionOutput(BaseModel):
  function_call: str = Field(
    description="The code used to call the function for current task"
  )
  

class EpisodeResult(BaseModel):
  """Represents an episode of an agent interacting with the environment.

  Attributes:
    env_success_score: Task success score from AndroidWorld environment (0.0-1.0).
    agent_done_bool: Whether the agent indicated task completion via stop/answer action.
    final_success_score: env_success_score if agent_done_bool=True, otherwise 0.0.
    final_success_bool: True if agent_done_bool=True and final_success_score > 0.5.
  """
  task_goal: str = ''
  log_task_path: str = ''
  agent_traj: Optional[Union[list[ReActStepInfo], RPAExecTraj]] = None
  action_history: list[str] = []
  env_success_score: float = 0.0
  agent_done_bool: bool = False
  final_success_score: float = 0.0
  final_success_bool: bool = False


class ExpResultLine(BaseModel):
  """
  '/' means not executed, '0' means execution failed, and '1' means execution succeeded.
  """
  task_type: str
  task_num: str
  task_goal: str
  round_0: str = '/'
  round_1: str = '/'
  round_2: str = '/'
  task_1: str = '/'
  task_2: str = '/'
  task_3: str = '/'
  task_4: str = '/'
  task_5: str = '/'
  based_on_task: str = '/'
  has_ask_mllm: int = 0
  has_get_ui_info: int = 0
  has_shell: int = 0
  cnt_fetch_info: float = 0


class RecordToken(BaseModel):
  file_path: str = ''
  task_type: str = ''
  task_num: str = ''
  attempt_cnt: str = ''
  stage: str = ''
  step: str = ''
  agent: str = ''
  step_tokens: CompletionUsage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
  llm: str = ''


class StringOutput:
  str: str


class AskMLLMOutput(BaseModel):
  thought: str
  answer: str