from copy import deepcopy
import os

from .core.agent_rpa import Agent_RPA
from .core.rpa_builder import RPA_Builder_Agent
# Import pluggable agent architecture from shared interfaces
from gui_agents.interfaces import BaseGUIAgent
from .env_operation import EnvOperation
from .episode_runner import run_episode
from .suite_utils import *
from .utils import models
from .utils.llm_client import get_llm_wrapper
from .utils.models import ExpResultLine
from .utils.agent_utils import print_with_color, record_exp_result, write_to_file, save_json
from .utils.rpa_bank_utils import RPABank
from .utils.traj_utils import ReactTrajBank, RPAExecTrajBank
# Import GUI agent exploration utilities (shared with run_tasks_gui_agent)
from . import run_tasks_gui_agent

FLAGS = flags.FLAGS

# Alias for backward compatibility - use shared implementation from run_tasks_gui_agent
run_task = run_tasks_gui_agent.run_task_in_autorpa


def run_rpa_verification(
  task: task_eval.TaskEval,
  env_op: EnvOperation,
  agent: Agent_RPA,
  log_path: str
) -> tuple[dict[str, Any], models.RPAExecTraj | None]:
  """Runs a task.

  Args:
    task: The task.
    env: Environment that will be run on.
    agent: The agent to run the task.
    log_path: The path to save the log file.

  Returns:
    Episode data and associated success signals.

  Raises:
    ValueError: If step data was not as expected.
  """
  agent.rpa_mode = True
  
  start = time.time()
  time.sleep(3)
  # Mode: RPA Verification
  print("========================================================================")
  # For MiniWoB tasks, goal will be available after initialize_task in run_episode
  if FLAGS.suite_family.startswith('miniwob'):
    print(f'Running rpa_verification for task {task.name}')
  else:
    print(f'Running rpa_verification for task {task.name} with goal "{task.goal}"')
  
  # try to complete the task by RPAExec(RPAVerification)
  episode_result = run_episode(task, env_op, agent, log_path)
  print(f'{"Task Successful ✅" if episode_result.final_success_bool else "Task Failed ❌"}; {task.goal}\n')
  
  rpa_exec_traj = episode_result.agent_traj
  episode_length = len(rpa_exec_traj.traj)
    
  # get rpa exec trajs for RPABuilder
  if episode_result.env_success_score == 1.0:
    if episode_result.agent_done_bool:
      exec_feedback = "The benchmark indicates task success, and the code executed successfully."
    else:
      exec_feedback = "The benchmark indicates task success, but the code did not execute smoothly and did not terminate normally. Please carefully review the execution history to identify which part of the code did not perform as expected."
  elif episode_result.env_success_score == 0.0:
    if episode_result.agent_done_bool:
      exec_feedback = "The benchmark task failed, but the code output an end marker. Please carefully review the execution history to identify which part of the code did not execute as expected."
    else:
      exec_feedback = "Benchmark task failed without an end marker in the code output. Please review the execution history to identify which part of the code caused the failure."
  else:
    benchmark_feedback = f"The benchmark judged the task as partially completed (approximately {episode_result.env_success_score * 100:.0f}%), "
    if episode_result.agent_done_bool:
      exec_feedback = benchmark_feedback + "but the code output an end marker."
    else:
      exec_feedback = benchmark_feedback + "and the code did not execute smoothly and did not terminate normally."
  rpa_exec_traj.exec_result.exec_feedback = exec_feedback if rpa_exec_traj.exec_result.exec_feedback is None else rpa_exec_traj.exec_result.exec_feedback + exec_feedback
  
  rpa_exec_traj.success_score = episode_result.final_success_score
  rpa_exec_traj.success = episode_result.final_success_bool
  
  # # conclusion for the task: conclude when failed the task
  # if not episode_result.final_success_bool:
  #   agent.reflection_history = []
  #   concluder_result = agent.Concluder_Agent(goal=task.goal, log_task_path=log_path, episode_results=episode_result)
  #   rpa_exec_traj.conclusion = concluder_result.episode_conclusion
  #   rpa_exec_traj.reflection = concluder_result.reflection
  
  result = {
    constants.EpisodeConstants.GOAL: task.goal,
    constants.EpisodeConstants.TASK_TEMPLATE: task.name,
    constants.EpisodeConstants.EPISODE_DATA: episode_result.agent_traj,
    constants.EpisodeConstants.IS_SUCCESSFUL: episode_result.env_success_score,
    constants.EpisodeConstants.RUN_TIME: time.time() - start,
    constants.EpisodeConstants.FINISH_DTIME: datetime.datetime.now(),
    constants.EpisodeConstants.EPISODE_LENGTH: episode_length,
    constants.EpisodeConstants.AUX_DATA: None,
    constants.EpisodeConstants.SCREEN_CONFIG: get_screen_config(task),
    constants.EpisodeConstants.EXCEPTION_INFO: None,
    constants.EpisodeConstants.SEED: task.params[
      constants.EpisodeConstants.SEED
    ],
  }
  return result, rpa_exec_traj

def _call_concluder(task_goal: str, list_react_traj: list[models.ReActTraj], agent: Agent_RPA, file_path: str):
  print_with_color("Running Concluder Agent.", 'yellow')
  # 1. Extract completed_tasks (from last step)
  last_traj = list_react_traj[-1]
  if last_traj.traj and len(last_traj.traj) > 0:
    last_step = last_traj.traj[-1]
    completed_tasks_str = last_step.completed_tasks if last_step.completed_tasks else "No tasks completed."
  else:
    completed_tasks_str = "No tasks completed."
  agent.completed_tasks = [completed_tasks_str]
  
  # 2. Extract action_history
  agent.action_history = last_traj.action_history.copy() if last_traj.action_history else []
  
  # 3. Extract reflection_history (from all trajectories' reflection field)
  agent.reflection_history = []
  for traj in list_react_traj:
    if traj.reflection:
      agent.reflection_history.append(traj.reflection)
  
  # 4. Load screenshot
  screenshot_resized = None
  if last_traj.traj and len(last_traj.traj) > 0:
    last_step = last_traj.traj[-1]
    exec_info = last_step.exec_step_info
    
    if exec_info.after_screenshot_path:
      # Resolve relative path to absolute (relative to project root)
      from pathlib import Path
      screenshot_path = Path(exec_info.after_screenshot_path)
      if not screenshot_path.is_absolute():
        # Locate project root (directory containing main.py)
        project_root = Path(__file__).parent.parent
        screenshot_path = project_root / exec_info.after_screenshot_path
      
      if screenshot_path.exists():
        try:
          screenshot_resized = agent._load_and_resize_image_cached(
            exec_info.after_screenshot_path,
            (461, 1024)
          )
        except Exception as e:
          print_with_color(f"Error loading screenshot: {e}", 'red')
          import traceback
          traceback.print_exc()
  
  # 5. Call Concluder_Agent
  concluder_output = agent.Concluder_Agent(
    goal=task_goal,
    log_task_path=file_path,
    episode_results=models.EpisodeResult(
      env_success_score=last_traj.env_success_score,
      agent_done_bool=last_traj.agent_done_bool,
      final_success_score=last_traj.final_success_score,
      final_success_bool=last_traj.final_success_bool,
      agent_traj=last_traj.traj,
      action_history=last_traj.action_history,
    ),
    screenshot_resized=screenshot_resized
  )

  return concluder_output

def run(
  task_suite: Suite,
  env_op: EnvOperation,
  agent: Agent_RPA,
  rpa_bank: RPABank,
  react_traj_bank: ReactTrajBank,
  task_templates: dict,
  gui_agent: BaseGUIAgent | None = None,
) -> list[dict[str, Any]]:
  """Runs full AutoRPA workflow on task suite.
  
  This function executes the complete AutoRPA pipeline:
  1. Exploration phase (using GUI agent)
  2. RPA building phase (translating actions and generating RPA code)
  3. Verification phase (testing RPA on multiple task instances)
  
  Note: For pure GUI agent exploration without RPA building, 
  use run_tasks_gui_agent.py instead.

  Args:
    env_op: The environment to run it on.
    agent: The agent to run the task (used for RPA Builder and Concluder).
    rpa_bank: The rpa bank to use.
    react_traj_bank: The react_star trajectory bank to use.
    task_templates: Task templates dictionary.
    gui_agent: (Optional) Pluggable GUI agent for exploration phase.
      If None, defaults to ReactStarAgent wrapping the existing agent.

  Returns:
    Metadata for each episode, including the scripted reward.
  """
  
  # gui_agent should always be provided by caller (main.py)
  if gui_agent is None:
    raise ValueError("gui_agent must be provided. It should be created in main.py via create_gui_agent().")
  
  max_task_num = FLAGS.num_tasks_to_explore  # Number of tasks to explore per task_type
  max_attempts = FLAGS.max_attempts_per_task  # Max attempts to build RPA per task
  episodes_metadata: list[dict[str, Any]] = []
  cnt_task_type = 0
  
  explore_rpa_banks = [RPABank(load_local_bank=False), RPABank(load_local_bank=False), RPABank(load_local_bank=False), RPABank(load_local_bank=False), RPABank(load_local_bank=False)]  # Index+1 = number of tasks to explore
  
  for task_type, instances in task_suite.items():
    cnt_task_type += 1
    cnt_generate_rpa = 0
    cnt_fetch_info = 0
    print_with_color(f"================ Start -- Task Type {cnt_task_type}: {task_type} =================", 'blue')
    
    task_type_log_path = os.path.join(FLAGS.log_folder_exp, task_type)
    os.makedirs(task_type_log_path, exist_ok=True)
    
    rpa_exec_traj_bank = RPAExecTrajBank()
    
    if FLAGS.test_rpa_mode:  # Employment phase
      print_with_color("\n======================== Employment Phase ==============================", 'blue')
      exp_result_csv = os.path.join(FLAGS.log_folder_exp, "test_result.csv")
      test_result = {"Num": cnt_task_type, "Task Type": task_type, "Task Template": task_templates[task_type]}
      
      test_success = {}
      action_info = {}
      for task_idx in FLAGS.to_test_tasks:
        task_type_log_path = os.path.join(FLAGS.log_folder_exp, task_type)
        os.makedirs(task_type_log_path, exist_ok=True)
        log_path = os.path.join(task_type_log_path, f'Task_{task_idx}')
        
        instance = instances[task_idx]
        agent.rpa_bank = rpa_bank
        
        agent.record_token = models.RecordToken(file_path=FLAGS.log_folder_exp, task_type=task_type,
                                                     task_num=f'Task 0', stage='Test')
        episode, rpa_exec_traj = run_rpa_verification(task=instance, env_op=env_op, agent=agent, log_path=log_path)
        save_json(rpa_exec_traj, save_path=log_path, file_name='rpa_exec_traj.json')
        test_result.update({f"Test_{task_idx}": rpa_exec_traj.success_score})
        
        if 'env_op.ask_mllm' in rpa_bank.rpa_dict[task_type]['rpa_code']:
          action_info.update({f"{task_idx}_has_ask_mllm": 1})
        else:
          action_info.update({f"{task_idx}_has_ask_mllm": 0})
        if 'env_op.get_html_body' in rpa_bank.rpa_dict[task_type]['rpa_code']:
          action_info.update({f"{task_idx}_has_get_ui_info": 1})
        else:
          action_info.update({f"{task_idx}_has_get_ui_info": 1})
        if 'shell(' in rpa_bank.rpa_dict[task_type]['rpa_code']:
          action_info.update({f"{task_idx}_has_shell": 1})
        else:
          action_info.update({f"{task_idx}_has_shell": 0})
      
      test_result.update(test_success | action_info)
      record_exp_result(exp_result_csv, test_result)
      # -----end: recording
    else:
      print_with_color("\n===================== Exploration Phase ============================", 'blue')
      exploration_path = os.path.join(task_type_log_path, 'Exploration')  # TaskType/Exploration
      abandoned_tasks = []  # Tasks that React could not complete or verify failed repeatedly
      rpa_bank_candidate = RPABank(load_local_bank=False)  # get an empty rpa_bank to store candidate rpa
      RPABuilder_Agent = RPA_Builder_Agent(
        default_llm=get_llm_wrapper(FLAGS.builder_llm, enable_logging=FLAGS.enable_llm_logging),
        enable_shell_action=FLAGS.enable_shell_action
      )
      
      exp_result_csv = os.path.join(FLAGS.log_folder_exp, "exploration_result.csv")
      
      for task_num in range(1, max_task_num + 1):
        task_veri = instances[task_num]
        task_veri.task_num = task_num
        
        # For MiniWoB tasks, initialize the task before accessing goal
        if FLAGS.suite_family.startswith('miniwob') and not task_veri.initialized:
          task_veri.initialize_task(env_op.raw_env)
        
        task_goal = task_veri.goal
        task_num_path = os.path.join(exploration_path, f'task_{task_num}')  # TaskType/Exploration/task_1
        rpa_bank_candidate_temp = deepcopy(rpa_bank_candidate)
        rpa_exec_traj_temp = deepcopy(rpa_exec_traj_bank)
        exp_result_line = ExpResultLine(task_type=task_type, task_num=f'task_{task_num}', task_goal=task_goal)
        cur_verified_tasks = []
        
        # Try each task up to max_attempts times; exit when a verified RPA is obtained
        i = 0
        while i < max_attempts:
          attempt_path = os.path.join(task_num_path, f'attempt_{i + 1}')
          flag_init = True  # initialize the env determine by bp_analyzer result
          rpa_exec_traj = None
          FLAGS.cur_attempt_cnt = i + 1
          
          print_with_color(f"\n======== Task_num: {task_num}, Attempt: {i + 1} ========\n", 'blue')
          print(f'abandoned_tasks: {abandoned_tasks}\n')
          
          # If rpa_bank_candidate has this task_type, verify RPA on all explored tasks;
          # otherwise no task has succeeded yet, skip verification
          if task_type in rpa_bank_candidate.rpa_dict:
            agent.rpa_bank = rpa_bank_candidate  # use to run verification
            fit_path = os.path.join(attempt_path, 'RPAFitExec')  # task_1/attempt_1/RPAFitExec
            
            ## -----start: verify t-to-1 tasks (verify tasks 1..t in reverse)
            print_with_color(f"\n======== Verify {task_num}-to-1 tasks ========\n", 'blue')
            flag_all_success = True
            rpa_exec_traj = None
            
            for j in range(task_num, 0, -1):
              print_with_color(f"\n======== Verify task {j}\n", 'blue')
              task_veri = instances[j]
              task_veri.task_num = j
              
              # For MiniWoB tasks, initialize the task before accessing goal
              if FLAGS.suite_family.startswith('miniwob') and not task_veri.initialized:
                task_veri.initialize_task(env_op.raw_env)
              
              if f'{task_veri.goal}_{j}' in abandoned_tasks:
                print_with_color(f'Task is abandoned: {task_veri.goal}\nTry next task.\n', 'yellow')
                continue
              if f'{task_veri.goal}_{j}' in rpa_bank_candidate.rpa_dict[task_type]['verified_tasks']:
                print_with_color(f'Task has been verified: {task_veri.goal}\nTry next task.\n', 'green')
                continue
              
              agent.record_token = models.RecordToken(file_path=FLAGS.log_folder_exp, task_type=task_type,
                                                           task_num=f'Task {task_veri.task_num}', stage='Verification')
              log_path = os.path.join(fit_path, f'task_{j}')
              episode, rpa_exec_traj = run_rpa_verification(task=task_veri, env_op=env_op, agent=agent,
                                                                log_path=log_path)
              cur_rpa = rpa_bank_candidate.rpa_dict[task_type]['rpa_code']
              rpa_exec_traj_bank.add_rpa_exec_traj(rpa_code=cur_rpa, rpa_exec_traj=rpa_exec_traj)
              save_json(rpa_exec_traj, save_path=log_path, file_name='rpa_exec_traj.json')
              if not rpa_exec_traj.success:
                flag_all_success = False
                break
              cur_verified_tasks.append(f'{task_veri.goal}_{j}')  # On full success, store first t goal names
            
            cur_ve_tasks = []
            for tt in cur_verified_tasks:
              if int(tt[-1]) <= task_num:
                cur_ve_tasks.append(tt)
            
            rpa_bank_candidate.update_verified_tasks(task_type=task_type, verified_tasks=cur_ve_tasks)
            if len(cur_ve_tasks) == 0: print_with_color("\n---------------\nNo verified tasks", 'red')
            else: print_with_color('\n---------------\nVerified Tasks:', 'green')
            for m, task in enumerate(cur_ve_tasks):
              print_with_color(f"{m} {task}", 'green')
            
            if flag_all_success:
              print_with_color(f"\n======== Verify {task_num}-to-1 tasks all success ========\n", 'blue')
              # record exp result
              for tn, verified_task in enumerate(cur_ve_tasks):
                num = int(verified_task[-1])
                setattr(exp_result_line, f"task_{num}", '1')
              for tn, abandoned_task in enumerate(abandoned_tasks):
                num = int(abandoned_task[-1])
                setattr(exp_result_line, f"task_{num}", 'abandon')
              rpa_bank_candidate.save_temp(task_type=task_type, save_path=fit_path)  # Save current RPA to log dir
              break  # t++
            elif i == max_attempts - 1:
              # After max_attempts, verification still failed; add task to abandoned_tasks and reset rpa_bank_candidate
              abandoned_tasks.append(f'{task_veri.goal}_{j}')
              # record exp result
              for tn, verified_task in enumerate(cur_ve_tasks):
                num = int(verified_task[-1])
                setattr(exp_result_line, f"task_{num}", '1')
              for tn, abandoned_task in enumerate(abandoned_tasks):
                num = int(abandoned_task[-1])
                setattr(exp_result_line, f"task_{num}", 'abandon')
              rpa_bank_candidate = rpa_bank_candidate_temp
              rpa_exec_traj_bank = rpa_exec_traj_temp
              break  # t++
            ## -----end: verify t-to-1 tasks
            
            ## -----start: call MLLM to evaluate the current page
            bp_analyzer_path = os.path.join(attempt_path, 'BreakpointAnalyzer')
            bp_analyzer_output = agent.Breakpoint_Analyzer_Agent(rpa_exec_traj=rpa_exec_traj,
                                                               log_path=bp_analyzer_path)
            exec_continue = bp_analyzer_output.to_continue  # exec_continue = y or n
            rpa_exec_traj.fix_evaluator_analysis = f"Observation: {bp_analyzer_output.observation}\nCode Diagnosis: {bp_analyzer_output.code_diagnosis}"
            print_with_color(f"Breakpoint_Analyzer_Agent result: {exec_continue}", 'blue')
            ## -----end: call MLLM to evaluate the current page
            
            if len(rpa_exec_traj.traj) >= env_op.max_action_step:
              exec_continue = 'n'
              print_with_color(f"Max action step reached, exec_continue set to NO", 'red')
            elif env_op.done:
              exec_continue = 'n'
              print_with_color("Environment indicates task is done. exec_continue set to NO", 'red')
            elif exec_continue.lower() == 'y':
              flag_init = False
              agent.completed_tasks = [f'{bp_analyzer_output.completed_tasks}']
          
          ## -----start: ReAct + RPABuilder(with VerificationResult)
          log_react_path = os.path.join(attempt_path, 'ReAct')
          list_react_traj = react_traj_bank.get_react_traj(task_type, task_veri.goal, task_veri.task_num, return_all_rounds=True)
          flag_react_traj_exists = False if list_react_traj is None else True
          # As rpa verification failed, run Fix ReAct
          if not flag_init:
            print_with_color("Need to continue ReAct from cur_page.", 'yellow')
            # whether to initialize the task env, determine by bp_analyzer result
            agent.record_token = models.RecordToken(file_path=FLAGS.log_folder_exp, task_type=task_type,
                                                        task_num=f'Task {task_veri.task_num}', stage='Fix ReAct')
            episode, list_fix_react_traj = run_task(task=task_veri, env_op=env_op,
                                              agent=agent, to_init_task=False, log_path=log_react_path, react_round=1,
                                              gui_agent=gui_agent)
            if list_fix_react_traj[-1].final_success_bool:
              # Translate fix_react_traj actions before passing to RPA Builder
              print_with_color("🔧 Translating fix_react_traj actions...", 'cyan')
              list_fix_react_traj = agent.batch_translate_actions(
                react_trajs=list_fix_react_traj,
                log_path=os.path.join(log_react_path, 'fix_traj_translation')
              )
              
              rpa_exec_traj.fix_react_traj = list_fix_react_traj
              flag_react_traj_exists = True
            else:
              print_with_color("\n---------------\nFix ReAct failed.", 'red')
              if not flag_react_traj_exists:
                continue
          
          if flag_react_traj_exists:
            print_with_color("\n======== ReAct Traj Exists. ========\n", 'green')  # Also printed when react_star continues after RPA exec
            if not flag_init and rpa_exec_traj.fix_react_traj is not None:
              if rpa_exec_traj.fix_react_traj and len(rpa_exec_traj.fix_react_traj) > 0:
                concluder_output = _call_concluder(task_goal=task_goal, list_react_traj=rpa_exec_traj.fix_react_traj, agent=agent, file_path=os.path.join(log_react_path, 'concluder'))
                rpa_exec_traj.conclusion = rpa_exec_traj.fix_react_traj[-1].conclusion = concluder_output.episode_conclusion
                rpa_exec_traj.reflection = rpa_exec_traj.fix_react_traj[-1].reflection = concluder_output.reflection
              save_json(rpa_exec_traj, save_path=log_react_path, file_name='rpa_exec_traj.json')
              # Save fix_react_traj as separate JSON file
              if rpa_exec_traj.fix_react_traj and len(rpa_exec_traj.fix_react_traj) > 0:
                save_json(rpa_exec_traj.fix_react_traj, save_path=log_react_path, file_name='fix_react_traj.json')
            else:
              if not list_react_traj[-1].conclusion:
                concluder_output = _call_concluder(task_goal=task_goal, list_react_traj=list_react_traj, agent=agent, file_path=os.path.join(log_react_path, 'concluder'))
                list_react_traj[-1].conclusion = concluder_output.episode_conclusion
                list_react_traj[-1].reflection = concluder_output.reflection
              else:
                print_with_color(f"Conclusion is already set: {list_react_traj[-1].conclusion}", 'green')
                print_with_color("Skipping Concluder", 'green')
              save_json(list_react_traj, save_path=log_react_path, file_name='list_react_traj.json')
          else:
            print_with_color("ReAct Traj Doesn't Exist, running ReAct Agent.", 'yellow')
            print_with_color("Need to restart ReAct.", 'yellow')
            
            agent.record_token = models.RecordToken(file_path=FLAGS.log_folder_exp, task_type=task_type,
                                                         task_num=f'Task {task_veri.task_num}', stage='ReAct')
            episode, list_react_traj = run_task(task=task_veri, env_op=env_op,
                                                agent=agent, to_init_task=True, log_path=log_react_path,
                                                react_round=FLAGS.reflection_rounds + 1,
                                                gui_agent=gui_agent)
            save_json(list_react_traj, save_path=log_react_path, file_name='list_react_traj.json')
            
            # If React run failed for this task, abandon it
            if not list_react_traj[-1].final_success_bool:
              abandoned_tasks.append(f'{task_veri.goal}_{task_veri.task_num}')
              for cnt in range(0, len(list_react_traj)):
                setattr(exp_result_line, f'round_{cnt}', '0')  # failed round
              
              rpa_bank_candidate = rpa_bank_candidate_temp
              rpa_exec_traj_bank = rpa_exec_traj_temp
              break  # t++, i=1
            
            exp_result_line.round_0 = '/'
            exp_result_line.round_1 = '/'
            exp_result_line.round_2 = '/'
            setattr(exp_result_line, f'round_{len(list_react_traj) - 1}', '1')  # successful round
            for cnt in range(0, len(list_react_traj) - 1):
              setattr(exp_result_line, f'round_{cnt}', '0')  # failed round
            
            # Store traj in react_trajs_dict only when ReAct succeeded
            # When task was started from scratch
            if list_react_traj[-1].final_success_bool:
              react_traj_bank.clear_translation_cache(task_type, task_veri.goal, task_veri.task_num)
              react_traj_bank.add_react_traj(
                task_type=task_type,
                task_goal=task_veri.goal,
                instance_id=task_veri.task_num,
                list_react_traj=list_react_traj,
                force_update=FLAGS.force_update_react_trajs_bank,
              )
              if FLAGS.update_react_trajs_bank:
                react_traj_bank.save()
          
          # ===== RPA Building Phase =====
          # Batch Action Translation (Before RPA Builder)
          # Translate actions befeedbackfore passing to RPA Builder, rather than during ReAct execution
          # This is a core AutoRPA functionality, not specific to any GUI agent
          rpa_builder_path = os.path.join(attempt_path, 'RPABuilder')  # init_task/RPABuilder
          all_react_traj_cur_task = react_traj_bank.get_react_traj(
            task_type,
            task_veri.goal,
            task_veri.task_num,
            return_all_rounds=True
          )
          
          if all_react_traj_cur_task and len(all_react_traj_cur_task) > 0 and not all_react_traj_cur_task[-1].conclusion:
            concluder_output = _call_concluder(task_goal=task_goal, list_react_traj=all_react_traj_cur_task, agent=agent, file_path=os.path.join(rpa_builder_path, 'concluder'))
            all_react_traj_cur_task[-1].conclusion = concluder_output.episode_conclusion
            all_react_traj_cur_task[-1].reflection = concluder_output.reflection
          
          # Check runtime cache first: cache holds full trajectory list (no nested lists)
          cached_translated_trajs = (
            react_traj_bank.get_cached_translation(task_type, task_goal, task_veri.task_num)
            if task_goal else None
          )
          
          if cached_translated_trajs:
            # Use cached translated trajectory list
            all_react_traj_cur_task = cached_translated_trajs
          else:
            if all_react_traj_cur_task and len(all_react_traj_cur_task) > 0:
              # Batch action translation for all rounds of current task
              all_react_traj_cur_task = agent.batch_translate_actions(
                react_trajs=all_react_traj_cur_task,
                log_path=rpa_builder_path
              )
              # Write translated full list to cache for reuse
              if task_goal:
                react_traj_bank.cache_translated_traj(task_type, task_goal, task_veri.task_num, all_react_traj_cur_task)
          
          # RPA Builder(with VerificationResult)
          print_with_color("\n\nRunning RPABuilder_Agent", 'blue')
          RPABuilder_Agent.record_token = models.RecordToken(file_path=FLAGS.log_folder_exp, task_type=task_type,
                                                                    task_num=f'Task {task_veri.task_num}',
                                                                    stage='RPA Builder')
          
          # Extract encountered task goals from all tasks we've seen so far (from task 1 to current task_num)
          encountered_task_goals = []
          for j in range(1, task_num + 1):
            task_seen = instances[j]
            # For MiniWoB tasks, initialize the task before accessing goal
            if FLAGS.suite_family.startswith('miniwob') and not task_seen.initialized:
              task_seen.initialize_task(env_op.raw_env)
            goal = task_seen.goal
            if goal and goal not in encountered_task_goals:
              encountered_task_goals.append(goal)
          
          # Could return how many tool calls this round used
          rpa_info, cur_cnt_fetch_info = RPABuilder_Agent.generate_rpa_code(log_task_path=rpa_builder_path,
                                                              task_type=task_type,
                                                              task_template=task_templates[task_type],
                                                              list_react_traj=all_react_traj_cur_task,
                                                              pre_rpa_exec_traj=rpa_exec_traj,
                                                              encountered_task_goals=encountered_task_goals if encountered_task_goals else None)
          cnt_generate_rpa += 1
          cnt_fetch_info += cur_cnt_fetch_info
          # save rpa_candidate
          rpa_bank_candidate.add_rpa(rpa_info)
          rpa_bank_candidate.update_based_on_task(task_type, task_veri.task_num)
          rpa_bank_candidate.save_temp(task_type=task_type, save_path=rpa_builder_path)  # Save current RPA to log dir
          ## -----end: ReAct + RPABuilder
          i += 1
        
        if rpa_bank_candidate.rpa_dict.get(task_type):
          if 'env_op.ask_mllm' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
            exp_result_line.has_ask_mllm = 1
          if 'env_op.get_cur_ui_content' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
            exp_result_line.has_get_ui_info = 1
          if 'shell(' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
            exp_result_line.has_shell = 1
          exp_result_line.based_on_task = rpa_bank_candidate.rpa_dict[task_type].get('based_on_task', '0')
        
        # save record to csv
        record_exp_result(exp_result_csv, exp_result_line.dict())
        
        cur_rpa_bank = explore_rpa_banks[task_num-1]
        cur_rpa_bank.merge_from(rpa_bank_candidate)
        cur_rpa_bank.save_temp(save_path=FLAGS.log_folder_exp, file_name=f'temp_rpa_{task_num}.json')
      
      # When matched the max_attempts
      # check if completed all tasks
      exp_result_line = ExpResultLine(task_type=task_type, task_num=f'Final', task_goal='-', task_1='abandon',
                                      task_2='abandon', task_3='abandon', task_4='abandon', task_5='abandon',
                                      cnt_fetch_info=(cnt_fetch_info / cnt_generate_rpa) if cnt_generate_rpa > 0 else 0,)
      if task_type in rpa_bank_candidate.rpa_dict.keys():
        # -----start: recording
        exp_result_line.based_on_task = rpa_bank_candidate.rpa_dict[task_type]['based_on_task']
        for tn, verified_task in enumerate(rpa_bank_candidate.rpa_dict[task_type].get('verified_tasks', [])):
          num = int(verified_task[-1])
          setattr(exp_result_line, f"task_{num}", '1')
        if 'env_op.ask_mllm' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
          exp_result_line.has_ask_mllm = 1
        if 'env_op.get_cur_ui_content' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
          exp_result_line.has_get_ui_info = 1
        if 'shell(' in rpa_bank_candidate.rpa_dict[task_type]['rpa_code']:
          exp_result_line.has_shell = 1
        # -----end: recording
        
        for m, task in enumerate(rpa_bank_candidate.rpa_dict[task_type]['verified_tasks']):
          print_with_color(f"{m} {task}", 'blue')
        
        rpa_bank.merge_from(rpa_bank_candidate)  # update rpa_bank
        rpa_bank.save_temp(save_path=FLAGS.log_folder_exp,
                             file_name='temp_rpa.json')  # Also save updated rpa_bank in local experiment folder
        
        if FLAGS.update_rpa_bank:  # Update the complete rpa bank
          rpa_bank.save()
        print_with_color(
          f"====================== End -- Task Type {cnt_task_type}: {task_type} =========================", 'blue')
        print_with_color(f'rpa_bank:\n{rpa_bank.rpa_dict[task_type]}\n', 'blue')
      else:
        # try next task type
        print_with_color(
          f"====================== End -- Task Type {cnt_task_type}: {task_type} =========================", 'blue')
        print_with_color("Oh no, failed to create the rpa.", 'red')
      
      record_exp_result(exp_result_csv, exp_result_line.dict())
  
  return episodes_metadata
