from typing import Literal, Optional, Union, Any

from openai.types import CompletionUsage
from pydantic import BaseModel
from pydantic.fields import Field


class PlannerOutput(BaseModel):
  summarize_and_analyze: str
  consider_reflection: Optional[str] = None
  completed_tasks: Optional[str] = None  # input to planner
  plan_reason: str = None
  plan_list: str = None  # input to planner
  code_reason: str = None  # input to summarizer
  code: str = None  # important


class PlannerOutput_w_soft(BaseModel):
  summarize_and_analyze: str
  consider_reflection: Optional[str] = None
  completed_tasks: Optional[str] = None  # input to planner
  plan_reason: str = None
  plan_list: str = None  # input to planner
  code_reason: str = None  # input to summarizer
  code: str = None  # important
  soft_action: str = None  # could be generated by a new agent


class ActionTranslatorOutput(BaseModel):
  thought: str
  soft_action: str = Field(
    description="Contain three lines of code: \nkwargs = {{...}}  # A dictionary describing the target UI element\nindex = env_op.find_element(**kwargs)  # Use env_op to locate the element dynamically\nenv_op.xxx(...)  # Replace xxx with the correct action using the index")


class SummarizerOutput(BaseModel):
  screen_changes: str
  execution_summary: str


class AskMLLMOutput(BaseModel):
  thought: str
  answer: str


class ConcluderOutput(BaseModel):
  episode_conclusion: str
  reflection: Optional[str] = None


class ExecEvaluatorOutput(BaseModel):
  observation: Optional[str] = None
  completed_tasks: Optional[str] = None
  plan_reason: Optional[str] = None
  plan_list: Optional[str] = None
  code_diagnosis: Optional[str] = None
  to_continue: Optional[str] = None


class OneStepActorOutput(BaseModel):
  observation: Optional[str] = None
  completed_tasks: Optional[str] = None
  code_reason: Optional[str] = None
  code: Optional[Any] = None
  soft_action: Optional[str] = None


class ScreenObs(BaseModel):
  screenshot: Optional[Any] = None
  screenshot_resized: Optional[Any] = None
  screenshot_with_som: Optional[Any] = None
  screenshot_with_som_resized: Optional[Any] = None
  screenshot_path: Optional[str] = None
  screenshot_w_som_path: Optional[str] = None
  ui_forest: Optional[Any] = None
  ui_elements: Optional[Any] = None
  ui_content: Optional[Any] = None
  ui_content_path: Optional[str] = None
  ui_content_dict: Optional[dict] = None


class PlannerStepData(BaseModel):
  obs: ScreenObs
  output: Union[PlannerOutput, PlannerOutput_w_soft]
  executed_code: str = ""


class SummarizerStepData(BaseModel):
  obs: ScreenObs
  output: SummarizerOutput


class PlannerInteractionResult(BaseModel):
  data: PlannerStepData
  execution_summary: Optional[str] = None


class SummarizerInteractionResult(BaseModel):
  screen_changes: str
  execution_summary: str


# from action execution result in env_op
class EnvExecStepInfo(BaseModel):
  before_ui_content: Optional[str] = None
  before_screenshot_path: Optional[str] = None
  before_screenshot_w_som_path: Optional[str] = None
  executed_action: str = ''
  related_elements: Optional[str] = ''
  related_target: Optional[int | str] = None
  action_feedback: Optional[str] = None
  is_screen_changed: bool = False
  after_ui_content: Optional[str] = None
  after_screenshot_path: Optional[str] = None
  after_screenshot_w_som_path: Optional[str] = None


class ExecResult(BaseModel):
  executed_code: str = ''
  error_statement: Optional[str] = None
  error_message: Optional[str] = None  # from exec e
  exec_feedback: Optional[str] = ''
  answer_return: Optional[str] = None  # answer return: complete, N/A, None, answer text
  agent_done: Optional[bool] = False
  done: Optional[bool] = False


class ReActStepInfo(BaseModel):
  step_n: int
  ui_content: str  # ui_list
  obs_description: str  # from planner output
  action_reason: str  # from planner output
  action: str  # actually executed_code
  soft_coded_action: str  # generated by planner, refined in extract_ui_value
  related_elements: str  # from execution result
  execution_summary: str  # from summarizer output
  action_step_str: str  # f'{self.env_op.action_history[-1]}\nExecution Summary:{execution_summary}'
  exec_step_info: EnvExecStepInfo
  # env_op_traj: list[EnvExecStepInfo]


class ReActTraj(BaseModel):
  task: str
  pre_reflection: Optional[str] = None
  traj: list[ReActStepInfo]
  action_history: list[str]
  success: bool = False
  conclusion: Optional[str] = None
  env_op_traj: list[EnvExecStepInfo]


class RPAExecTraj(BaseModel):
  task: str = ''
  function_call: str = ''  # one line used to call function, from MLLM: `extract params`
  rpa_code: str = ''  # from rpa_bank
  exec_result: Optional[ExecResult] = None
  success: bool = False
  action_history: Optional[list] = []
  env_op_traj: list[EnvExecStepInfo] = []  # Trajectory of page-related actions
  fix_evaluator_analysis: Optional[str] = None  # from Exec_Evaluator_Agent
  fix_react_traj: Optional[list[ReActTraj]] = None
  conclusion: Optional[str] = None


# RPA Function
class RPAInfo(BaseModel):
  output_type: Literal['rpa_func'] = 'rpa_func'
  task_type: str = ""
  parameters: str = Field(
    description="Define function parameters from the Task Template. Names must be generic and reusable across tasks. Make all parameters Optional to improve generalization and flexibility."
  )
  rpa_description: str = Field(
    description="Briefly describe the task the RPA function performs (under 30 words)."
  )
  rpa_code: str = Field(
    description="Provide a single Python code block with well-commented code ready for execution on GUI device. Wrap the code in a reusable function (e.g., def function_name():) with generic parameters. Ensure the code handles all cases."
  )
  example_usage: str = Field(
    default="",
    description="A real example demonstrating how to call the code"
  )
  conclusion: str = Field(
    default="",
    description="Summarize how the RPA Code was constructed, considering screen state, input parameters, and task context. Explain how the code adapts to UI changes, influenced by the Task Template or previous RPA Code. Describe robustness strategies and why it's reusable across similar tasks."
  )


# Tool Calling
class FetchInfoTool(BaseModel):
  output_type: Literal['fetch_info'] = Field(
    default='fetch_info',
    description="Fetch detailed information of a specific step in the trajectory, such as screenshots and a list of UI elements"
  )
  traj_id: str = Field(
    description="Which trajectory to fetch from, e.g., 'pre_rpa_exec_traj', 'successful_react_traj', 'failed_react_traj', 'fix_react_traj'"
  )
  step_n: int = Field(
    description="Step number to inspect (1-based index)"
  )


class RPABuilderOutput_optional(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  info_to_clarify: Optional[str] = Field(
    default="None",
    description="Describe whether you need to gain a better understanding of the page details and ensure the code runs smoothly, and if so, to investigate which step details are necessary to obtain what type of information"
  )
  output: Union[RPAInfo, FetchInfoTool]


class RPABuilderOutput_tool(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  info_to_clarify: Optional[str] = Field(
    default="None",
    description="You need to gain a better understanding of the page details and ensure the code runs smoothly, so, try to investigate which step details are necessary to obtain what type of information"
  )
  output: FetchInfoTool


class RPABuilderOutput(BaseModel):
  thought: str = Field(
    description="State the failure cause, highlight differences from success, and suggest robustness improvements. Avoid vague details. (under 100 words)"
  )
  output: RPAInfo


class ParamsExtractionOutput(BaseModel):
  function_call: str = Field(
    description="The code used to call the function for current task"
  )


class EpisodeResult(BaseModel):
  """Represents an episode of an agent interacting with the environment.

  Attributes:
    env_success: Whether the task is successful and the agent is done.
    step_data: Environment and agent data for each step.
    task_successful: Whether the task is successful according to the environment.
    agent_done: Whether the agent indicated the task is complete.
    agent_successful: The success score of the agent.
  """
  task_goal: str = ''
  log_task_path: str = ''
  agent_traj: Union[list[ReActStepInfo], RPAExecTraj] = None
  action_history: list[str] = []
  agent_done: bool = False
  env_success: bool = False
  task_successful: float = 0.0
  agent_successful: float = 0.0


class MllmMatchTarget(BaseModel):
  thought: str
  target_index: int
  confidence_score: int

class ExpResultLine(BaseModel):
  """
  '/' means not executed, '0' means execution failed, and '1' means execution succeeded.
  """
  # num: int
  task_type: str
  task_num: str
  task_goal: str
  round_0: str = '/'
  round_1: str = '/'
  round_2: str = '/'
  task_1: str = '/'
  task_2: str = '/'
  task_3: str = '/'
  task_4: str = '/'
  task_5: str = '/'
  based_on_task: str = '/'
  # has_one_step_actor: int = 0
  has_ask_mllm: int = 0
  has_get_ui_info: int = 0
  cnt_fetch_info: float = 0


class RecordToken(BaseModel):
  file_path: str = ''
  task_type: str = ''
  task_num: str = ''
  attempt_cnt: str = ''
  stage: str = ''
  step: str = ''
  agent: str = ''
  step_tokens: CompletionUsage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
  llm: str = ''