"""
run RPA to get rpa_code
run ReAct to get react_trajs_library.json

params:
use_react_library
update_react_library
n_task_combinations: The number of tasks for a specific task type. default: 3
"""

import datetime
import json
import math
import os
import subprocess
import sys
import time
from collections import defaultdict
from collections.abc import Sequence

from absl import app, flags, logging

logging.set_verbosity(logging.WARNING)

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
# set the URLs of each website
AWS_HOSTNAME = os.environ["AWS_HOSTNAME"]
os.environ["SHOPPING"] = f"http://{AWS_HOSTNAME}:7770"
os.environ["SHOPPING_ADMIN"] = f"http://{AWS_HOSTNAME}:7780/admin"
os.environ["REDDIT"] = f"http://{AWS_HOSTNAME}:9999"
os.environ["GITLAB"] = f"http://{AWS_HOSTNAME}:8023"
os.environ["MAP"] = f"http://{AWS_HOSTNAME}:3000"
os.environ["WIKIPEDIA"] = f"http://{AWS_HOSTNAME}:8888/wikipedia_en_all_maxi_2022-05/A/User:The_other_Kiwix_guy/Landing"
os.environ["HOMEPAGE"] = f"http://{AWS_HOSTNAME}:4399"
print("Done setting up URLs")

# First, run `python scripts/generate_test_data.py` to generate the config files
if '--list_tasks' not in sys.argv:
  p = subprocess.run(["python", "../webarena/scripts/generate_test_data.py"], capture_output=True)
  # It will generate individual config file for each test example in config_files
  assert os.path.exists("config_files/0.json")
  
  # re-validate login information
  subprocess.run(["python", "../webarena/browser_env/auto_login.py"])
  print("Done saving account cookies")
  
  from browser_env import ScriptBrowserEnv
  from UIAgents.Agent_RPA import agent_rpa
  from UIAgents.Agent_RPA.env_operation import EnvOperation
  from UIAgents.Agent_RPA.utils.llm_client import get_llm_wrapper
  from UIAgents.Agent_RPA.utils.agent_utils import record_exp_result, print_with_color
  from UIAgents.Agent_RPA.utils.rpa_bank_utils import RPABank
  from UIAgents.Agent_RPA.utils.traj_utils import ReactTrajBank
else:
  # For list_tasks mode, skip imports
  pass

ENV_NUM = 812

## Define flags
# about Agent
flags.DEFINE_string('agent_name', 'agent_rpa', help="Agent name: 'agent_react', 'agent_rpa'")

flags.DEFINE_string(
    'default_llm', 'gpt-5-low',
    help="Default LLM model for all components (unless overridden below)"
)

# --- Specialized LLM Models (optional overrides) ---
flags.DEFINE_string(
    'builder_llm', 'gpt-5-medium',
    help="LLM for RPA Builder Agent (generates RPA code from trajectories)"
)

flags.DEFINE_string(
    'grounder_llm', 'gpt-5-low',
    help="LLM for UI element grounding in find_element() calls. "
         "Note: Use non-thinking models like 'gemini-2.5-pro-nothinking' for faster grounding."
)

flags.DEFINE_string(
    'actiontranslator_llm', 'gpt-5-mini',
    help="LLM for ActionTranslator Agent (converts hardcoded actions to soft-coded actions)"
)

flags.DEFINE_string(
    'breakpoint_analyzer_llm', 'gpt-5-low',
    help="LLM for Breakpoint Analyzer Agent (analyzes breakpoints in RPA code)"
)

flags.DEFINE_string(
    'concluder_llm', 'gpt-5-low',
    help="LLM for Concluder Agent (concludes RPA code)"
)

flags.DEFINE_string(
    'params_extractor_llm', 'gpt-5-mini',
    help="LLM for Params Extractor Agent (extracts parameters from RPA code)"
)

flags.DEFINE_string(
    'ask_mllm_llm', 'gpt-5-low',
    help="LLM for AskMLLM action (asks questions to the LLM for information retrieval)"
)

# ⚙️ React* Agent Specific LLMs (only when gui_agent_type='react_star')
flags.DEFINE_string(
    'planner_llm', 'gpt-5-medium',
    help="[React* only] LLM for Planner Agent (generates action plans)"
)

flags.DEFINE_string(
    'summarizer_llm', 'gpt-5-low',
    help="[React* only] LLM for Summarizer Agent (summarizes execution results)"
)

flags.DEFINE_boolean('reflection', True, help='Whether to reflect at the last step.')
flags.DEFINE_integer('reflection_rounds', 2, help='The number of times the agent reflects.')

flags.DEFINE_integer('num_tasks_to_explore', 3,
                     help='Number of tasks to explore; directly impacts the reliability and robustness of the learned rpas.')
flags.DEFINE_integer('max_attempts_per_task', 3, help='Number of attempts to build the rpa for a task.')
flags.DEFINE_integer('cur_attempt_cnt', 1, help='Number of current attempt.')

# Tool Usage
flags.DEFINE_boolean('use_fetch_info', True, help='Whether to enable `fetch_info` tool for builder.')

flags.DEFINE_boolean('use_action_translator', True,
                     help='If true, the soft action will be generated by ActionTranslator Agent.')
flags.DEFINE_boolean('react_soft_action', False,
                     help='If true, the soft action will be generated by ReAct Agent.')

flags.DEFINE_boolean('load_rpa_bank', False, help='Whether to load the rpa_bank.')
flags.DEFINE_boolean('update_rpa_bank', True, help='Whether to update the rpa_bank.')
flags.DEFINE_boolean('use_react_trajs_bank', False, help='Whether to use the react_trajs_bank.')
flags.DEFINE_boolean('update_react_trajs_bank', True, help='Whether to update the react_trajs_bank.')

flags.DEFINE_boolean('test_rpa_mode', False, help='Whether to test the generated rpa code.')
flags.DEFINE_integer('min_validated_count', 1,
                     help='If the number of validated tasks is below this threshold, the rpa code will not be used.')
flags.DEFINE_list('to_test_tasks', [0], help='to_test_tasks has higher priority than test_tasks_cnt.')
flags.DEFINE_boolean('test_react_mode', False, help='Whether to test react agent on the test tasks.')
flags.DEFINE_boolean('list_tasks', False, help='If True, only list all available task types and exit without running any tasks.')

flags.DEFINE_boolean(
    'enable_llm_logging', False,
    help='Enable logging of each LLM call to local log folder. '
         'When False, LLM call logs (prompts, images, metadata) will not be saved.'
)

# about Benchmark
flags.DEFINE_string('examine_sites', 'reddit',
                    help="The sites for examining: reddit, shopping_admin, shopping, map, gitlab. Can specify multiple sites separated by comma, e.g., 'reddit,shopping_admin'")
flags.DEFINE_string('tasks', None,
                  'Task list. Multiple tasks separated by |, e.g., \'task1|task2\'. Single task string is also supported. If None, all tasks in the suite family will be run.')
flags.DEFINE_boolean('headless', False, help='Whether to run the emulator in headless mode.')

# about experiment record
flags.DEFINE_string('checkpoint_dir', '', 'The directory to save checkpoints and resume evaluation from.')
flags.DEFINE_string('output_path', os.path.expanduser('runs'), 'The path to save results to.')
flags.DEFINE_string('log_folder_exp', '', 'The main folder to save intermediate files.')

FLAGS = flags.FLAGS


def set_up_configs():
  os.environ['GRPC_VERBOSITY'] = 'ERROR'  # Only show errors
  os.environ['GRPC_TRACE'] = 'none'  # Disable tracing
    
  log_folder = "./log"
  os.makedirs(log_folder, exist_ok=True)
  current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
  # Each folder stores the intermediate files for one experiment
  if FLAGS.test_rpa_mode:
    FLAGS.log_folder_exp = os.path.join(log_folder, f"test_{current_time}")
  else:
    FLAGS.log_folder_exp = os.path.join(log_folder, f"{FLAGS.agent_name}_{current_time}")
  os.makedirs(FLAGS.log_folder_exp, exist_ok=True)
  log_file_name = os.path.join(FLAGS.log_folder_exp, f"output_{current_time}.log")
  
  ### Redirect sys.stdout
  sys.stdout = DualOutput(log_file_name)
  sys.stderr = DualError(log_file_name)

class DualOutput:
  def __init__(self, file_path):
    self.terminal = sys.stdout  # Keep console stdout
    self.log = open(file_path, 'a')  # Open log file
  def write(self, message):
    self.terminal.write(message)  # Write to console
    self.log.write(message)  # Also write to log file
    self.log.flush()  # Flush immediately
  def flush(self):
    pass  # Keep consistent

class DualError:
  def __init__(self, file_path):
    self.terminal = sys.stderr  # Keep console stderr
    self.log = open(file_path, 'a')  # Open log file
  def write(self, message):
    self.terminal.write(message)  # Write to console
    self.log.write(message)  # Also write to log file
    self.log.flush()  # Flush immediately
  def flush(self):
    pass  # Keep consistent


def get_agent(env_op):
  """Gets agent with configured LLM models."""
  print('Initializing agent...')
  
  # Create main LLM wrapper
  default_llm = get_llm_wrapper(model_name=FLAGS.default_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create ActionTranslator LLM wrapper if different model specified
  actiontranslator_llm = None
  if FLAGS.actiontranslator_llm != FLAGS.default_llm:
    print(f'Using ActionTranslator model: {FLAGS.actiontranslator_llm}')
    actiontranslator_llm = get_llm_wrapper(model_name=FLAGS.actiontranslator_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create Breakpoint Analyzer LLM wrapper if different model specified
  breakpoint_analyzer_llm = None
  if FLAGS.breakpoint_analyzer_llm != FLAGS.default_llm:
    print(f'Using Breakpoint Analyzer model: {FLAGS.breakpoint_analyzer_llm}')
    breakpoint_analyzer_llm = get_llm_wrapper(model_name=FLAGS.breakpoint_analyzer_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create Concluder LLM wrapper if different model specified
  concluder_llm = None
  if FLAGS.concluder_llm != FLAGS.default_llm:
    print(f'Using Concluder model: {FLAGS.concluder_llm}')
    concluder_llm = get_llm_wrapper(model_name=FLAGS.concluder_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create Params Extractor LLM wrapper if different model specified
  params_extractor_llm = None
  if FLAGS.params_extractor_llm != FLAGS.default_llm:
    print(f'Using Params Extractor model: {FLAGS.params_extractor_llm}')
    params_extractor_llm = get_llm_wrapper(model_name=FLAGS.params_extractor_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create Planner LLM wrapper if different model specified (for ReAct agent)
  planner_llm = None
  if FLAGS.planner_llm != FLAGS.default_llm:
    print(f'Using Planner model: {FLAGS.planner_llm}')
    planner_llm = get_llm_wrapper(model_name=FLAGS.planner_llm, enable_logging=FLAGS.enable_llm_logging)
  
  # Create RPA agent with LLM wrappers
  agent = agent_rpa.Agent_RPA(
    env_op,
    default_llm,
    actiontranslator_llm=actiontranslator_llm,
    breakpoint_analyzer_llm=breakpoint_analyzer_llm,
    concluder_llm=concluder_llm,
    params_extractor_llm=params_extractor_llm,
    planner_llm=planner_llm,
  )
  
  return agent


def _main() -> None:
  start_time = time.time()
  
  # Import task list utility if needed
  if FLAGS.list_tasks:
    from utils.task_list_utils import list_task_types
  
  # get the task data
  ALL_TASKS = defaultdict(list)
  with open("config_files/test.raw.json", "r") as f:
    data = json.load(f)
  
  # Parse examine_sites: split by comma if provided
  examine_sites_list = None
  if FLAGS.examine_sites:
    examine_sites_list = [site.strip() for site in FLAGS.examine_sites.split(",") if site.strip()]
  
  for instance in data:
    task_id = instance["task_id"]
    task_template = instance["intent_template"]
    # If examine_sites is set, keep only tasks whose sites are all in examine_sites
    if examine_sites_list is None or all([(site in examine_sites_list) for site in instance["sites"]]):
      ALL_TASKS[task_template].append(task_id)

  ALL_TASKS = {
    task_type: tasks
    for task_type, tasks in ALL_TASKS.items()
    if len(tasks) >= FLAGS.num_tasks_to_explore + 1
  }

  if FLAGS.tasks is None:
    task_types = list(ALL_TASKS.keys())
  else:
    # Parse tasks string: support | separator or single task
    tasks_str = FLAGS.tasks.strip()
    if '|' in tasks_str:
      # Multiple tasks separated by |
      task_types = [task.strip() for task in tasks_str.split('|') if task.strip()]
    else:
      # Single task
      task_types = [tasks_str]
  
  # If --list_tasks is set, only show task types and exit
  if FLAGS.list_tasks:
    list_task_types(examine_sites_list, FLAGS.num_tasks_to_explore, task_types)
    return
  
  if FLAGS.test_rpa_mode: FLAGS.load_rpa_bank = True
  rpa_bank = RPABank(file_name="rpa_bank.json", load_local_bank=FLAGS.load_rpa_bank)
  react_traj_bank = ReactTrajBank(file_name="react_trajs_bank.json", load_local_bank=FLAGS.use_react_trajs_bank)
  
  if FLAGS.test_rpa_mode: rpa_bank_task_types = list(rpa_bank.rpa_dict.keys())
  if FLAGS.test_react_mode: FLAGS.agent_name = 'agent_react'
  
  if FLAGS.agent_name == 'agent_rpa':
    from UIAgents.Agent_RPA import run_tasks_rpa as run_tasks
  else:
    from UIAgents.Agent_RPA import run_tasks_react as run_tasks
  
  print(f'Starting eval with agent {FLAGS.agent_name} on {len(task_types)} task types: {task_types}.')
  
  explore_rpa_banks = [RPABank(load_local_bank=False), RPABank(load_local_bank=False),
                         RPABank(load_local_bank=False), RPABank(load_local_bank=False),
                         RPABank(load_local_bank=False)]
  
  for cnt_task_type, task_type in enumerate(task_types):
    # For test_rpa_mode, skip the task types that are not in the rpa_bank or have less than min_validated_count validated tasks
    if FLAGS.test_rpa_mode and ((task_type not in rpa_bank_task_types) or rpa_bank.rpa_dict[task_type][
      'verified_tasks_num'] < FLAGS.min_validated_count):
      exp_result_csv = os.path.join(FLAGS.log_folder_exp, "test_result.csv")
      test_result = {"Num": cnt_task_type + 1, "Task Type": task_type}
      test_success, action_info = {}, {}
      for task_idx in FLAGS.to_test_tasks:
        test_success.update({f"Test_{task_idx}": '/'})
        action_info.update({f"{task_idx}_has_ask_mllm": '/'})
        action_info.update({f"{task_idx}_has_get_ui_info": '/'})
      test_result.update(test_success | action_info)
      record_exp_result(exp_result_csv, test_result)
      continue
    
    print_with_color(f"================ Start -- Task Type {cnt_task_type + 1}: {task_type} =================", 'blue')
    
    # set up the raw environment
    raw_env = ScriptBrowserEnv(
        headless=FLAGS.headless,
        slow_mo=0,
        observation_type="accessibility_tree",
        current_viewport_only=True,
        viewport_size={"width": 1280,"height": 960},
        save_trace_enabled=True,
        sleep_after_execution=0.0,
    )
    env_op = EnvOperation(raw_env, task_type)
    
    agent = get_agent(env_op)

    if FLAGS.test_react_mode or FLAGS.test_rpa_mode:
      task_list = [ALL_TASKS[task_type][int(task_id)] for task_id in FLAGS.to_test_tasks]
      print(f'Testing {FLAGS.agent_name} on the following tasks: {task_list}')
    else:
      task_list = ALL_TASKS[task_type]
    
    run_tasks.run(
      task_list=task_list,
      env_op=env_op,
      agent=agent,
      rpa_bank=rpa_bank,
      explore_rpa_banks=explore_rpa_banks,
      react_traj_bank=react_traj_bank,
      task_templates=task_type,
      cnt_task_type=cnt_task_type + 1,
    )
    
    raw_env.close()
  
  print(f'Finished running agent {FLAGS.agent_name} on {task_types}.')
  
  spend_time = time.time() - start_time
  hours = math.floor(spend_time / 3600)
  minutes = math.floor((spend_time - hours * 3600) / 60)
  seconds = int(spend_time - hours * 3600 - minutes * 60)
  print(f"Take {hours}hours {minutes}minutes {seconds}seconds.")


def main(argv: Sequence[str]) -> None:
  del argv
  set_up_configs()
  _main()


if __name__ == '__main__':
  app.run(main)
