"""Runs an agent on the environment.

This module is used for RPA verification phase and fix react phase.
It wraps ReactStarAgent internally and calculates remaining steps based on 
task complexity and already-executed actions.
"""

from gui_agents.react_star.adapter import ReactStarAgent
from .core.agent_rpa import Agent_RPA
from .env_operation import EnvOperation
from .suite_utils import *
from .utils.models import EpisodeResult
from .utils.agent_utils import print_with_color


def run_episode(
  task: task_eval.TaskEval,
  env_op: EnvOperation,
  agent: Agent_RPA,
  log_task_path: str,
  to_init_task: bool = True,
  max_steps_override: int | None = None,
):
  """Runs an agent on task, e.g., "turn off wifi".

  An agent will start from whatever state the provided environment is in and
  run until it determines a task is complete, if the max number of
  steps is reached, of if the termination_fn is True.

  Args:
    task: The task to run on the environment.
    agent: The agent to run on the environment.

  Returns:
    Data collected during running agent on task.
  """
  
  agent.reset(task, log_task_path, to_init_task)  # will reset agent.action_history
  
  # Calculate max steps using unified function
  react_max_steps = calculate_max_steps(
    task.complexity, 
    task_name=task.name,
    log_prefix="RPA Verification" if agent.rpa_mode else "Episode Runner"
  )
  
  # Determine remaining steps.
  # NOTE: agent.action_history is reset above; for Fix React callers that need to
  # subtract already executed steps, pass max_steps_override explicitly.
  if max_steps_override is not None:
    remaining_steps = max_steps_override
  else:
    remaining_steps = react_max_steps
  
  env_op.reset(task, log_task_path, to_init_task, react_max_steps)  # initialize task
  agent.cur_task = task.goal  # make task.goal if set after MobileMiniWoB task is initialized
  
  if not agent.rpa_mode:
    # Use adapter pattern for ReAct workflow
    # This maintains backward compatibility with the old episode_runner interface
    # while using the new decoupled architecture internally
    from absl import flags
    from .utils.llm_client import get_llm_wrapper
    FLAGS = flags.FLAGS
    
    # Create LLM wrappers for Planner and Summarizer
    planner_llm = get_llm_wrapper(model_name=FLAGS.planner_llm, enable_logging=FLAGS.enable_llm_logging)
    summarizer_llm = get_llm_wrapper(model_name=FLAGS.summarizer_llm, enable_logging=FLAGS.enable_llm_logging)
    
    # Get additional parameters from FLAGS
    action_space_mode = getattr(FLAGS, 'react_star_action_space', 'index')
    ui_info_mode = getattr(FLAGS, 'react_star_ui_info', 'screenshot_with_tree')
    img_resize_mode = getattr(FLAGS, 'react_star_img_resize_mode', 'resized')
    enable_shell_action = getattr(FLAGS, 'enable_shell_action', False)
    
    # Create temporary adapter for this episode
    # Note: episode_runner is used for single-round execution (RPA verification),
    # so no reflection rounds are needed here.
    adapter = ReactStarAgent(
      agent=agent,
      planner_llm=planner_llm,
      summarizer_llm=summarizer_llm,
      action_space_mode=action_space_mode,
      ui_info_mode=ui_info_mode,
      img_resize_mode=img_resize_mode,
      enable_shell_action=enable_shell_action
    )
    
    output = []
    # Run ReAct steps using adapter's internal method
    # Allow remaining_steps == 0 to skip planning/execution entirely.
    for step_n in range(max(remaining_steps, 0)):
      step_data = adapter._execute_react_step(env_op=env_op)
      agent_done = env_op.answer_return is not None
      output.append(step_data)
      if env_op.done:
        print('Environment ends episode.')
        break
  else:
    # run testing
    output = agent.rpa_testing()
    agent_done = env_op.answer_return is not None
  
  if not agent_done and len(agent.action_history) == react_max_steps:
    print_with_color('Agent did not indicate task is done. Reached max number of steps.', 'red')
  
  env_success_score = task.is_successful(env_op.raw_env)
  final_success_score = env_success_score if agent_done else 0.0
  final_success_bool = True if final_success_score > 0.5 else False
  
  return EpisodeResult(
    task_goal=task.goal,
    log_task_path=log_task_path,
    agent_traj=output,
    action_history=agent.action_history,
    env_success_score=env_success_score,
    agent_done_bool=agent_done,
    final_success_score=final_success_score,
    final_success_bool=final_success_bool,
  )