"""
run RPA to get rpa_code
run ReAct to get react_trajs_library.json

params:
use_react_library
update_react_library
n_task_combinations: The number of tasks for a specific task type. default: 3
"""

import datetime
import json
import math
import os
import sys
import time
from collections.abc import Sequence

import gym
from absl import app, flags, logging

from UIAgents.Agent_RPA import agent_rpa
from UIAgents.Agent_RPA.env_operation import EnvOperation
from UIAgents.Agent_RPA.utils.JSON_API import get_llm_wrapper
from UIAgents.Agent_RPA.utils.agent_utils import record_exp_result, print_with_color
from UIAgents.Agent_RPA.utils.rpa_bank_utils import RPABank
from UIAgents.Agent_RPA.utils.traj_utils import ReactTrajBank

logging.set_verbosity(logging.WARNING)

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

## ============================================================================
## FLAG DEFINITIONS - Organized by Category
## ============================================================================

# =============================================================================
# Agent Configuration
# =============================================================================
flags.DEFINE_string('agent_name', 'agent_rpa', 
                    help="Agent name: 'agent_react', 'agent_rpa'")
flags.DEFINE_integer('reflection_rounds', 2, 
                     help='The number of times the agent reflects.')

# =============================================================================
# LLM Model Configuration
# =============================================================================
flags.DEFINE_string('default_llm', 'gpt-5-medium',
                    help="Main LLM model. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low', 'gpt-5-medium', "
                         "'gemini-2.5-pro-thinking', 'claude-sonnet-4-5'")
flags.DEFINE_string('builder_llm', 'gpt-5-medium',
                    help="LLM model name for Builder in RPA Builder. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low', "
                         "'gpt-5-medium', 'gemini-2.5-pro-thinking', 'claude-sonnet-4-5'")
flags.DEFINE_string('grounding_llm', 'gpt-5-low',
                    help="LLM model name for grounding phase in find_element. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low'")
flags.DEFINE_string('actiontranslator_llm', 'gpt-5-low',
                    help="LLM model name for ActionTranslator Agent. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low'")
flags.DEFINE_string('params_extractor_llm', 'gpt-5-mini',
                    help="LLM model name for Params Extractor in RPA execution. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low'")

flags.DEFINE_string('planner_llm', 'claude-sonnet-4-20250514',
                    help="LLM model name for Planner Agent. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low', "
                         "'gpt-5-medium', 'gemini-2.5-pro-thinking', 'claude-sonnet-4-5'")
flags.DEFINE_string('summarizer_llm', 'gpt-5-low',
                    help="LLM model name for Summarizer Agent. Options: 'gpt-4o', 'gpt-4.1', 'gpt-5-low'")

flags.DEFINE_integer('num_tasks_to_explore', 5,
                     help='Number of tasks to explore; directly impacts the reliability and robustness of the learned rpas.')
flags.DEFINE_integer('max_attempts_per_task', 3, help='Number of attempts to build the rpa for a task.')
flags.DEFINE_integer('cur_attempt_cnt', 1, help='Number of current attempt.')

# Tool Usage
flags.DEFINE_boolean('use_fetch_info', True, help='Whether to enable `fetch_info` tool for builder.')

flags.DEFINE_boolean('use_action_translator', True,
                     help='If true, the soft action will be generated by ActionTranslator Agent.')
flags.DEFINE_boolean('react_soft_action', False,
                     help='If true, the soft action will be generated by ReAct Agent.')

flags.DEFINE_boolean('load_rpa_bank', False, help='Whether to load the rpa_bank.')
flags.DEFINE_boolean('update_rpa_bank', True, help='Whether to update the rpa_bank.')
flags.DEFINE_boolean('use_react_trajs_bank', False, help='Whether to use the react_trajs_bank.')
flags.DEFINE_boolean('update_react_trajs_bank', True, help='Whether to update the react_trajs_bank.')

flags.DEFINE_boolean('test_rpa_mode', False, help='Whether to test the generated rpa code.')
flags.DEFINE_integer('min_validated_count', 1,
                     help='If the number of validated tasks is below this threshold, the rpa code will not be used.')
flags.DEFINE_list('to_test_tasks', [0, 11, 12, 13, 14], help='to_test_tasks has higher priority than test_tasks_cnt.')
flags.DEFINE_boolean('run_react_test_tasks', False, help='Whether to run the tasks used to test rpa.')

# about Benchmark
flags.DEFINE_integer('task_random_seed', 30, 'Random seed for task randomness.')
flags.DEFINE_list('tasks', None,
                  'None, feedback, task_list. List of specific tasks to run in the given suite family, e.g. search-engine. If None, all tasks in the suite family will be run. "feedback" to run TASKS_FEEDBACK.')
flags.DEFINE_boolean('fixed_task_seed', False,
                     'Whether to use the same task seed when running multiple task combinations.')
flags.DEFINE_boolean('headless', True, help='Whether to run the emulator in headless mode.')

# about experiment record
flags.DEFINE_string('checkpoint_dir', '', 'The directory to save checkpoints and resume evaluation from.')
flags.DEFINE_string('output_path', os.path.expanduser('runs'), 'The path to save results to.')
flags.DEFINE_string('log_folder_exp', '', 'The main folder to save intermediate files.')

FLAGS = flags.FLAGS


def set_up_configs():
  os.environ['GRPC_VERBOSITY'] = 'ERROR'  # Only show errors
  os.environ['GRPC_TRACE'] = 'none'  # Disable tracing
  
  if FLAGS.agent_name == 'agent_react': FLAGS.run_react_test_tasks = True
  
  log_folder = "./log"
  os.makedirs(log_folder, exist_ok=True)
  current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
  # Each folder stores the intermediate files for one experiment
  if FLAGS.test_rpa_mode:
    FLAGS.log_folder_exp = os.path.join(log_folder, f"test_{current_time}_{FLAGS.default_llm}")
  else:
    FLAGS.log_folder_exp = os.path.join(log_folder, f"{FLAGS.agent_name}_{current_time}_{FLAGS.default_llm}")
  os.makedirs(FLAGS.log_folder_exp, exist_ok=True)
  log_file_name = os.path.join(FLAGS.log_folder_exp, f"output_{current_time}.log")
  
  ### Redirect sys.stdout
  sys.stdout = DualOutput(log_file_name)
  sys.stderr = DualError(log_file_name)


class DualOutput:
  def __init__(self, file_path):
    self.terminal = sys.stdout
    self.log = open(file_path, 'a')
  
  def write(self, message):
    self.terminal.write(message)
    self.log.write(message)
    self.log.flush()
  
  def flush(self):
    pass


class DualError:
  def __init__(self, file_path):
    self.terminal = sys.stderr
    self.log = open(file_path, 'a')
  
  def write(self, message):
    self.terminal.write(message)
    self.log.write(message)
    self.log.flush()
  
  def flush(self):
    pass


def get_agent(env_op: EnvOperation):
  """Gets agent with configured LLM models."""
  print('Initializing agent...')
  
  # Create Planner LLM wrapper
  if FLAGS.planner_llm != FLAGS.default_llm:
    print(f'Using Planner model: {FLAGS.planner_llm}')
  planner_llm = get_llm_wrapper(model_name=FLAGS.planner_llm)
  
  # Create Summarizer LLM wrapper
  if FLAGS.summarizer_llm != FLAGS.default_llm:
    print(f'Using Summarizer model: {FLAGS.summarizer_llm}')
  summarizer_llm = get_llm_wrapper(model_name=FLAGS.summarizer_llm)
  
  # Create ActionTranslator LLM wrapper
  if FLAGS.actiontranslator_llm != FLAGS.default_llm:
    print(f'Using ActionTranslator model: {FLAGS.actiontranslator_llm}')
  actiontranslator_llm = get_llm_wrapper(model_name=FLAGS.actiontranslator_llm)
  
  # Create Params Extractor LLM wrapper
  if FLAGS.params_extractor_llm != FLAGS.default_llm:
    print(f'Using Params Extractor model: {FLAGS.params_extractor_llm}')
  params_extractor_llm = get_llm_wrapper(model_name=FLAGS.params_extractor_llm)
  
  # Create main LLM wrapper (for other agents like Concluder)
  main_llm = get_llm_wrapper(model_name=FLAGS.default_llm)
  
  # Create agent with all LLM wrappers
  agent = agent_rpa.Agent_RPA(
    env_op, 
    main_llm,
    planner_llm=planner_llm,
    summarizer_llm=summarizer_llm,
    actiontranslator_llm=actiontranslator_llm,
    params_extractor_llm=params_extractor_llm
  )
  
  return agent


TASKS_FEEDBACK = [
  "search-engine",
  "tic-tac-toe",
  "email-inbox-forward-nl-turk",
  "terminal",
  "login-user-popup",
  "guess-number",
  "email-inbox-nl-turk",
  "email-inbox-forward-nl",
  "email-inbox"
]

ALL_TASKS = ['choose-list', 'click-button', 'click-button-sequence', 'click-checkboxes', 'click-checkboxes-large',
             'click-checkboxes-soft', 'click-checkboxes-transfer', 'click-collapsible', 'click-collapsible-2',
             'click-color', 'click-dialog', 'click-dialog-2', 'click-link', 'click-menu', 'click-option',
             'click-scroll-list', 'click-shades', 'click-shape', 'click-tab', 'click-tab-2', 'click-tab-2-hard',
             'click-test', 'click-test-2', 'click-widget', 'count-shape', 'enter-date', 'enter-password', 'enter-text',
             'enter-text-dynamic', 'enter-time', 'focus-text', 'focus-text-2', 'grid-coordinate', 'identify-shape',
             'login-user', 'multi-layouts', 'multi-orderings', 'navigate-tree', 'simple-algebra', 'social-media',
             'social-media-all', 'social-media-some', 'use-autocomplete', 'use-spinner', 'guess-number',
             'login-user-popup', 'tic-tac-toe', 'search-engine', 'terminal', 'email-inbox', 'email-inbox-nl-turk',
             'email-inbox-forward-nl', 'email-inbox-forward-nl-turk', 'email-inbox-delete', 'email-inbox-reply']

# choose-list,click-button,click-button-sequence,click-checkboxes,click-checkboxes-large,click-checkboxes-soft,click-checkboxes-transfer,click-collapsible,click-collapsible-2,click-color,click-dialog,click-dialog-2,click-link,click-menu,click-option,click-scroll-list,click-shades,click-shape,click-tab,click-tab-2,click-tab-2-hard,click-test,click-test-2,click-widget,count-shape,enter-date,enter-password,enter-text,enter-text-dynamic,enter-time,focus-text,focus-text-2,grid-coordinate,identify-shape,login-user,multi-layouts,multi-orderings,navigate-tree,simple-algebra,social-media,social-media-all,social-media-some,use-autocomplete,use-spinner,guess-number,login-user-popup,tic-tac-toe,search-engine,terminal,email-inbox,email-inbox-nl-turk,email-inbox-forward-nl,email-inbox-forward-nl-turk

# choose-list,click-button,click-button-sequence,click-checkboxes,click-checkboxes-large,click-checkboxes-soft,click-checkboxes-transfer,click-collapsible,click-collapsible-2,click-color,click-dialog,click-dialog-2,
# click-link,click-menu,click-option,click-scroll-list,click-shades,click-shape,click-tab,click-tab-2,click-tab-2-hard,click-test,click-test-2,click-widget,count-shape,enter-date,enter-password,
# enter-text,enter-text-dynamic,enter-time,focus-text,focus-text-2,grid-coordinate,identify-shape,login-user,multi-layouts,multi-orderings,navigate-tree,simple-algebra,social-media,social-media-all,
# social-media-some,use-autocomplete,use-spinner,guess-number,login-user-popup,tic-tac-toe,search-engine,terminal,email-inbox,email-inbox-nl-turk,email-inbox-forward-nl,email-inbox-forward-nl-turk

def _main() -> None:
  start_time = time.time()
  
  if FLAGS.test_rpa_mode: FLAGS.load_rpa_bank = True
  rpa_bank = RPABank(file_name="rpa_bank.json", load_local_bank=FLAGS.load_rpa_bank)
  react_traj_bank = ReactTrajBank(file_name="react_trajs_bank.json", load_local_bank=FLAGS.use_react_trajs_bank)
  
  # rpa_bank_task_types = ALL_TASKS
  if FLAGS.test_rpa_mode:
    task_types = ALL_TASKS
    if FLAGS.tasks: task_types = FLAGS.tasks
    rpa_bank_task_types = list(rpa_bank.rpa_dict.keys())
  else:
    if FLAGS.tasks is None:
      task_types = ALL_TASKS
    elif 'feedback' in FLAGS.tasks:
      task_types = TASKS_FEEDBACK
    else:
      task_types = FLAGS.tasks
  
  if FLAGS.agent_name == 'agent_rpa':
    from UIAgents.Agent_RPA import run_tasks_rpa as run_tasks
  else:
    from UIAgents.Agent_RPA import run_tasks_react as run_tasks
  
  # task_types = TASKS_FEEDBACK if FLAGS.tasks is None else FLAGS.tasks # ALL_TASKS
  print(f'Starting eval with agent {FLAGS.agent_name} on {task_types}.')
  
  with open('miniwob_template.json', 'r', encoding='utf-8') as f:
    task_templates = json.load(f)
  
  explore_rpa_banks = [RPABank(load_local_bank=False), RPABank(load_local_bank=False),
                         RPABank(load_local_bank=False), RPABank(load_local_bank=False),
                         RPABank(load_local_bank=False)]
  
  for cnt_task_type, task_type in enumerate(task_types):
    if FLAGS.test_rpa_mode and ((task_type not in rpa_bank_task_types) or rpa_bank.rpa_dict[task_type][
      'verified_tasks_num'] < FLAGS.min_validated_count):
      exp_result_csv = os.path.join(FLAGS.log_folder_exp, "test_result.csv")
      test_result = {"Num": cnt_task_type + 1, "Task Type": task_type, "Task Template": task_templates[task_type]}
      test_success, action_info = {}, {}
      for task_idx in FLAGS.to_test_tasks:
        test_success.update({f"Test_{task_idx}": '/'})
        action_info.update({f"{task_idx}_has_ask_mllm": '/'})
        action_info.update({f"{task_idx}_has_get_ui_info": '/'})
      test_result.update(test_success | action_info)
      record_exp_result(exp_result_csv, test_result)
      continue
    
    print_with_color(f"================ Start -- Task Type {cnt_task_type + 1}: {task_type} =================", 'blue')
    
    raw_env = gym.make("MiniWoBEnv-v0", env_name=task_type, headless=FLAGS.headless)
    env_op = EnvOperation(raw_env, task_type)
    
    agent = get_agent(env_op)
    
    run_tasks.run(
      env_op=env_op,
      agent=agent,
      rpa_bank=rpa_bank,
      explore_rpa_banks=explore_rpa_banks,  # Index+1 = number of tasks to explore
      react_traj_bank=react_traj_bank,
      task_templates=task_templates,
      cnt_task_type=cnt_task_type + 1,
    )
    
    raw_env.close()
  
  print(f'Finished running agent {FLAGS.agent_name} on {task_types}.')
  
  spend_time = time.time() - start_time
  hours = math.floor(spend_time / 3600)
  minutes = math.floor((spend_time - hours * 3600) / 60)
  seconds = int(spend_time - hours * 3600 - minutes * 60)
  print(f"Take {hours}hours {minutes}minutes {seconds}seconds.")


def main(argv: Sequence[str]) -> None:
  del argv
  set_up_configs()
  _main()


if __name__ == '__main__':
  app.run(main)
