"""
Pure GUI Agent Exploration Runner

This module provides a unified interface for GUI agent exploration,
used by both pure exploration mode and full AutoRPA mode.

Key Functions:
- run_task_exploration(): Async function to explore a single task
- run_task_legacy_compat(): Compatibility wrapper for old return format
- run(): Main entry point for pure exploration mode

Usage:
    python main.py --agent_name=gui_agent --gui_agent_type=react_star
"""

import asyncio
import json
import sys

from absl import flags
from gui_agents.interfaces import BaseGUIAgent
from gui_agents.react_star.adapter import ReactStarAgent
from .env_operation import EnvOperation
from .suite_utils import *
from .utils import models
from .utils.agent_utils import print_with_color, record_exp_result, save_json
from .utils.traj_utils import ReactTrajBank

FLAGS = flags.FLAGS


def infer_react_star_action_space(action_space_mode: str, ui_info_mode: str) -> str:
    """
    Auto-infer react_star_action_space from react_star_ui_info if user didn't explicitly set it.
    
    This function checks if the user explicitly set react_star_action_space by checking sys.argv,
    and if not, infers the appropriate action_space based on ui_info_mode.
    
    Args:
        action_space_mode: Current action_space_mode value (from FLAGS)
        ui_info_mode: Current ui_info_mode value (from FLAGS)
        
    Returns:
        Inferred or original action_space_mode value
        
    Auto-inference rules:
        - screenshot_with_tree or screenshot_only_som → index (needs element indexes)
        - screenshot_only → coordinate (no element indexes available)
    """
    # Check if user explicitly set react_star_action_space by checking sys.argv
    action_space_explicitly_set = any(
        '--react_star_action_space' in arg or arg.startswith('--react_star_action_space=')
        for arg in sys.argv
    )
    
    # Auto-inference rules (only if user didn't explicitly set action_space):
    if not action_space_explicitly_set:
        if ui_info_mode == 'screenshot_only':
            # screenshot_only doesn't provide element indexes, so use coordinate mode
            print(f'  ℹ️  Auto-inferred react_star_action_space=coordinate (from react_star_ui_info=screenshot_only)')
            return 'coordinate'
        elif ui_info_mode in ['screenshot_with_tree', 'screenshot_only_som']:
            # screenshot_with_tree and screenshot_only_som provide element indexes, so use index mode
            print(f'  ℹ️  Auto-inferred react_star_action_space=index (from react_star_ui_info={ui_info_mode})')
            return 'index'
    
    # User explicitly set action_space or no inference needed, return original value
    return action_space_mode


async def run_task_exploration(
    task: task_eval.TaskEval,
    gui_agent: BaseGUIAgent,
    env_op: EnvOperation,
    log_path: str,
    to_init_task: bool = True,
    max_steps: int = None,  # If None, will be calculated from task.complexity
    reflection: str = None
) -> models.ReActTraj:
    """
    Run exploration for a single task using GUI agent (single round).
    Only care about current round.
    
    Args:
        task: The task to explore
        gui_agent: GUI agent instance
        env_op: Environment operation interface
        log_path: Path to save logs and trajectories
        to_init_task: Whether to initialize task environment
        max_steps: Maximum steps for exploration (if None, calculated from task.complexity)
        reflection: Optional reflection from previous failed attempt (provided by Concluder)
        
    Returns:
        ReActTraj: ReAct trajectory from this exploration round
    """
    # Calculate max_steps from task complexity if not provided
    if max_steps is None:
        from .suite_utils import calculate_max_steps
        max_steps = calculate_max_steps(
            task.complexity,
            task_name=task.name,
            log_prefix="Exploration"
        )
    
    # Initialize task if needed (for MiniWoB compatibility)
    if FLAGS.suite_family.startswith('miniwob') and not task.initialized:
        task.initialize_task(env_op.raw_env)
    
    print_with_color(
        f"\n🚀 Exploring: {task.name} | Goal: {task.goal}",
        'blue'
    )
    
    if reflection:
        print_with_color(
            f"📝 Using reflection from Concluder for this round",
            'yellow'
        )
    
    # Run single exploration round
    react_traj = await gui_agent.explore_task(
        task=task,
        env_op=env_op,
        max_steps=max_steps,
        reflection=reflection,
        to_init_task=to_init_task,
        log_path=log_path
    )
    
    # Save trajectory
    traj_save_path = os.path.join(log_path, 'react_trajectory.json')
    os.makedirs(log_path, exist_ok=True)
    with open(traj_save_path, 'w') as f:
        json.dump(react_traj.to_dict(), f, indent=2, default=str)
    
    status = "✅ Success" if react_traj.final_success_bool else "❌ Failed"
    print_with_color(f"{status} | Task: {task.goal}", 'green' if react_traj.final_success_bool else 'red')
    
    return react_traj


async def run_task_exploration_with_reflection(
    task: task_eval.TaskEval,
    gui_agent: BaseGUIAgent,
    env_op: EnvOperation,
    agent_rpa: Any,  # Agent_RPA for Concluder
    log_path: str,
    max_reflection_rounds: int = 2,
    max_steps: int = None  # If None, will be calculated from task.complexity
) -> tuple[models.ReActTraj, list[models.ReActTraj]]:
    """
    Run exploration with reflection rounds managed by AutoRPA Concluder.
    
    This function orchestrates multiple exploration rounds:
    1. First round: Explore without reflection
    2. If failed: Use AutoRPA Concluder to generate reflection
    3. Next round: Explore with reflection from Concluder
    4. Repeat until success or max_reflection_rounds reached
    
    Args:
        task: The task to explore
        gui_agent: GUI agent instance
        env_op: Environment operation interface
        agent_rpa: Agent_RPA instance (for accessing Concluder)
        log_path: Base path for logs
        max_reflection_rounds: Maximum number of reflection rounds (total rounds = max_reflection_rounds + 1)
        max_steps: Maximum steps per exploration round (if None, calculated from task.complexity)
        
    Returns:
        Tuple of (final_react_traj, all_react_trajs)
    """
    # Calculate max_steps from task complexity if not provided
    if max_steps is None:
        from .suite_utils import calculate_max_steps
        max_steps = calculate_max_steps(
            task.complexity,
            task_name=task.name,
            log_prefix="Exploration with Reflection"
        )
    
    # Initialize task if needed (for MiniWoB compatibility)
    if FLAGS.suite_family.startswith('miniwob') and not task.initialized:
        task.initialize_task(env_op.raw_env)
    
    print_with_color(
        f"\n🔄 Starting exploration with up to {max_reflection_rounds + 1} rounds",
        'blue'
    )
    
    all_react_trajs = []
    reflection = None
    total_rounds = max_reflection_rounds + 1
    
    for round_idx in range(total_rounds):
        round_log_path = os.path.join(log_path, f'round_{round_idx}')
        os.makedirs(round_log_path, exist_ok=True)
        
        print_with_color(
            f"\n{'='*80}\n"
            f"🔄 Round {round_idx}/{total_rounds}\n"
            f"{'='*80}",
            'cyan'
        )
        
        # Run exploration for this round
        react_traj = await run_task_exploration(
            task=task,
            gui_agent=gui_agent,
            env_op=env_op,
            log_path=round_log_path,
            to_init_task=True,
            max_steps=max_steps,
            reflection=reflection
        )
        react_traj.round = round_idx
        all_react_trajs.append(react_traj)
        
        # Call Concluder after failed round to generate conclusion
        print_with_color(
            f"\n{'✅' if react_traj.final_success_bool else '❌'} Round {round_idx} {'succeeded' if react_traj.final_success_bool else 'failed'}. ",
            'blue'
        )

        concluder_output = None
        # Do not call concluder on the last round
        if not react_traj.final_success_bool and round_idx < max_reflection_rounds:
            print_with_color(
                f"Calling Concluder to generate conclusion...",
                'blue'
            )
        
            # Set token recording for Concluder
            agent_rpa.record_token.step = '-'
            agent_rpa.record_token.stage = 'Reflection'
            
            # Save agent's previous state
            prev_action_history = agent_rpa.action_history
            prev_completed_tasks = agent_rpa.completed_tasks
            prev_reflection_history = agent_rpa.reflection_history
            
            # Set up agent state for Concluder
            if react_traj:
                agent_rpa.action_history = react_traj.action_history
                
                # Extract completed tasks from trajectory
                # Use the planner's completed_tasks from the last step
                if react_traj.traj and len(react_traj.traj) > 0:
                    last_step = react_traj.traj[-1]
                    # Use Planner's completed_tasks - None or empty means task is at initial stage
                    if hasattr(last_step, 'completed_tasks'):
                        if last_step.completed_tasks in (None, ''):
                            # Initial stage: no tasks completed yet
                            agent_rpa.completed_tasks = ['You just started, no tasks have been completed yet.']
                        else:
                            # Use Planner's actual completed_tasks output
                            agent_rpa.completed_tasks = [last_step.completed_tasks]
                    else:
                        # Fallback if attribute doesn't exist
                        agent_rpa.completed_tasks = ['']
                else:
                    agent_rpa.completed_tasks = ['No actions were executed in this round.']
            
            # Add previous reflections to history
            if not hasattr(agent_rpa, 'reflection_history'):
                agent_rpa.reflection_history = []
            if reflection:
                agent_rpa.reflection_history.append(reflection)
            
            # Call Concluder
            concluder_output = agent_rpa.Concluder_Agent(
                goal=task.goal,
                log_task_path=round_log_path,
                episode_results=models.EpisodeResult(
                        env_success_score=react_traj.env_success_score,
                        agent_done_bool=react_traj.agent_done_bool,
                        final_success_score=react_traj.final_success_score,
                        final_success_bool=react_traj.final_success_bool,
                        agent_traj=react_traj.traj,
                        action_history=react_traj.action_history,
                    )
            )
            
            if concluder_output and react_traj:
                react_traj.conclusion = concluder_output.episode_conclusion
                react_traj.reflection = concluder_output.reflection
            
            # Restore agent's previous state
            agent_rpa.action_history = prev_action_history
            agent_rpa.completed_tasks = prev_completed_tasks
            agent_rpa.reflection_history = prev_reflection_history
            
            print_with_color(
                f"📝 Conclusion: {concluder_output.episode_conclusion if concluder_output else 'N/A'}",
                'cyan'
            )
        
            # If not last round and failed, use reflection for next round
            if round_idx < total_rounds - 1:
                print_with_color(
                    f"📝 Reflection for next round:\n{concluder_output.reflection}",
                    'yellow'
                )

        # If task succeeded, break out of loop
        if react_traj.final_success_bool:
            print_with_color(
                f"\n✅ Task completed successfully in round {round_idx}!",
                'green'
            )
            break
    
    # Save all trajectories
    all_trajs_path = os.path.join(log_path, 'all_trajectories.json')
    with open(all_trajs_path, 'w') as f:
        json.dump(
            [react_traj.to_dict() for react_traj in all_react_trajs],
            f,
            indent=2,
            default=str
        )
    
    return all_react_trajs[-1], all_react_trajs


def run_task_in_autorpa(
    task: task_eval.TaskEval,
    env_op: EnvOperation,
    agent,
    log_path: str,
    to_init_task: bool = True,
    react_round: int = 1,
    gui_agent: BaseGUIAgent = None
) -> tuple[dict[str, Any], list[models.ReActTraj]]:
    """
    Legacy compatibility wrapper for run_task.
    
    Wraps GUI agent exploration in the old return format (episode_dict, list of ReActTraj).
    Used by run_tasks_rpa.py for backward compatibility with existing RPA building logic.
    
    This function now uses AutoRPA's Concluder for reflection generation, rather than
    having the GUI agent generate its own reflections.
    
    Args:
        task: Task to run
        env_op: Environment operation
        agent: Agent_RPA instance (for Concluder)
        log_path: Log path
        to_init_task: Whether to init task
        react_round: Number of rounds (total rounds with reflection)
        gui_agent: GUI agent to use (if None, creates default)
        
    Returns:
        Tuple of (episode_dict, list_of_react_trajs) for backward compatibility
    """
    # gui_agent should always be provided by caller
    if gui_agent is None:
        raise ValueError("gui_agent must be provided. It should be created in main.py via create_gui_agent().")
    
    # Initialize token recording if agent has record_token (should be set by caller)
    if not hasattr(agent, 'record_token') or agent.record_token is None:
        agent.record_token = models.RecordToken(
            file_path=log_path,
            task_type=task.name,
            task_num='',
            stage='GUI Exploration'
        )
    
    # Calculate max_steps based on task complexity (same logic as episode_runner)
    from .suite_utils import calculate_max_steps
    react_max_steps = calculate_max_steps(
        task.complexity,
        task_name=task.name,
        log_prefix="Legacy Compat"
    )
    
    # Calculate remaining steps for fix react scenario
    if not to_init_task:
        # Fix react: calculate remaining steps = total steps - RPA executed steps
        executed_steps = len(agent.action_history) if getattr(agent, "action_history", None) else 0
        remaining_steps = react_max_steps - executed_steps
        print_with_color(
            f"🔧 Fix React: Total budget={react_max_steps}, "
            f"RPA executed={executed_steps}, "
            f"Remaining={remaining_steps}",
            'yellow'
        )
        # If no remaining budget, stop immediately (do NOT call Planner).
        if remaining_steps <= 0:
            print_with_color(
                "🛑 Fix React budget exhausted (remaining<=0). Skipping ReAct planning/execution.",
                'red'
            )
            empty_traj = models.ReActTraj(
                task=task.goal,
                reflection="",
                traj=[],
                action_history=[],
                env_success_score=0.0,
                agent_done_bool=False,
                final_success_score=0.0,
                final_success_bool=False,
                conclusion="Budget exhausted during RPA code execution (remaining<=0).",
            )
            episode = {
                'goal': task.goal,
                'task_template': task.name,
                'is_successful': False,
                'run_time': 0.0,
                'episode_length': 0,
            }
            return episode, [empty_traj]
        max_steps_to_use = react_max_steps
    else:
        # Normal exploration: use full budget
        max_steps_to_use = react_max_steps
    
    # Run exploration with reflection managed by AutoRPA Concluder
    start = time.time()
    
    if react_round > 1:
        # Multiple rounds with reflection (managed by Concluder)
        final_react_traj, all_react_trajs = asyncio.run(
            run_task_exploration_with_reflection(
                task=task,
                gui_agent=gui_agent,
                env_op=env_op,
                agent_rpa=agent,
                log_path=log_path,
                max_reflection_rounds=react_round - 1,
                max_steps=max_steps_to_use
            )
        )
        react_traj = final_react_traj
        list_react_trajs = all_react_trajs
    else:
        # Single round exploration (no reflection)
        react_traj = asyncio.run(
            run_task_exploration(
                task=task,
                gui_agent=gui_agent,
                env_op=env_op,
                log_path=log_path,
                to_init_task=to_init_task,
                max_steps=max_steps_to_use,
                reflection=None
            )
        )
        
        if react_traj:
            list_react_trajs = [react_traj]
    
    # Create episode result dict
    episode = {
        'goal': task.goal,
        'task_template': task.name,
        'is_successful': react_traj.final_success_bool,
        'run_time': time.time() - start,
        'episode_length': len(react_traj.traj),
    }
    
    return episode, list_react_trajs


def run(
    task_suite: Suite,
    env_op: EnvOperation,
    react_traj_bank: ReactTrajBank,
    task_templates: dict,
    gui_agent: BaseGUIAgent = None,
    agent = None,  # For compatibility, used to create default ReactStarAgent and for Concluder
    max_reflection_rounds: int = None,  # Maximum reflection rounds (None means use FLAGS.reflection_rounds)
) -> list[dict[str, Any]]:
    """
    Run pure GUI agent exploration on task suite.
    
    No RPA building, no verification - just exploration and trajectory collection.
    Reflection is managed by AutoRPA's Concluder, not the GUI agent.
    
    Args:
        task_suite: Suite of tasks to explore
        env_op: Environment operation interface
        react_traj_bank: Trajectory bank to save results
        task_templates: Task templates dictionary
        gui_agent: GUI agent instance (if None, creates default ReactStarAgent)
        agent_rpa: Agent_RPA instance for creating default GUI agent and for Concluder
        max_reflection_rounds: Maximum reflection rounds (if None, uses FLAGS.reflection_rounds)
        
    Returns:
        List of episode metadata dictionaries
    """
    # gui_agent should always be provided by caller (main.py)
    if gui_agent is None:
        raise ValueError("gui_agent must be provided. It should be created in main.py via create_gui_agent().")
    
    # Set max_reflection_rounds
    if max_reflection_rounds is None:
        max_reflection_rounds = FLAGS.reflection_rounds if hasattr(FLAGS, 'reflection_rounds') else 0
    
    print_with_color(
        f"\n{'='*80}\n"
        f"🔍 GUI Agent Exploration Mode\n"
        f"Agent: {gui_agent.agent_name}\n"
        f"Max Reflection Rounds: {max_reflection_rounds}\n"
        f"{'='*80}\n",
        'blue'
    )
    
    episodes_metadata = []
    cnt_task_type = 0
    
    # Result CSV
    exp_result_csv = os.path.join(FLAGS.log_folder_exp, "gui_exploration_result.csv")
    
    for task_type, instances in task_suite.items():
        cnt_task_type += 1
        print_with_color(
            f"\n{'='*80}\n"
            f"📋 Task Type {cnt_task_type}: {task_type}\n"
            f"{'='*80}\n",
            'blue'
        )
        
        # Setup paths
        task_type_log_path = os.path.join(FLAGS.log_folder_exp, task_type)
        os.makedirs(task_type_log_path, exist_ok=True)
        
        # Determine which tasks to run
        if FLAGS.run_react_test_tasks:
            task_list = FLAGS.to_test_tasks
        else:
            task_list = range(1, FLAGS.num_tasks_to_explore + 1)
        
        # Result record
        result = {
            "Num": cnt_task_type,
            "Task Type": task_type,
            "Task Template": task_templates.get(task_type, "N/A")
        }
        
        for task_idx in task_list:
            print_with_color(f"\n{'─'*40}\nTask {task_idx}\n{'─'*40}", 'cyan')
            
            task = instances[task_idx]
            task.task_num = task_idx
            log_path = os.path.join(task_type_log_path, f'task_{task_idx}')
            # Initialize trajectory vars for current task to avoid UnboundLocalError
            react_traj = None
            # Use all_react_trajs to record all rounds for current task (multi/single round)
            all_react_trajs: list = []
            
            # Run exploration with reflection (if max_reflection_rounds > 0)
            start_time = time.time()
            
            # Initialize token recording
            if agent is not None:
                agent.record_token = models.RecordToken(
                    file_path=FLAGS.log_folder_exp,
                    task_type=task_type,
                    task_num=f'Task {task_idx}',
                    stage='GUI Exploration'
                )
            
            if max_reflection_rounds > 0 and agent is not None:
                # Use reflection orchestration managed by Concluder
                final_react_traj, all_trajs = asyncio.run(
                    run_task_exploration_with_reflection(
                        task=task,
                        gui_agent=gui_agent,
                        env_op=env_op,
                        agent_rpa=agent,
                        log_path=log_path,
                        max_reflection_rounds=max_reflection_rounds,
                        max_steps=None  # Will be calculated from task.complexity
                    )
                )
                
                # Ensure a single react_traj is available for later logic
                react_traj = final_react_traj
                
                # Save all trajectories to trajectory bank (multi-round session)
                # Collect ReActTraj for all rounds
                for round_idx, traj in enumerate(all_trajs):
                    traj.round = round_idx
                    all_react_trajs.append(traj)
                        
                # Only save if final attempt succeeded
                if all_react_trajs and all_react_trajs[-1].final_success_bool:
                    react_traj_bank.add_react_traj(
                        task_type,
                        task.goal,
                        task_idx,
                        all_react_trajs,
                        force_update=FLAGS.force_update_react_trajs_bank
                    )
                    print_with_color(
                        f"✅ Saved {len(all_react_trajs)}-round session to react_trajs_bank (final success)",
                        'green'
                    )
                else:
                    print_with_color(
                        f"⚠️  Not saving to react_trajs_bank: final attempt failed after {len(all_react_trajs)} rounds",
                        'yellow'
                    )
            else:
                # Single round exploration (no reflection)
                react_traj = asyncio.run(
                    run_task_exploration(
                        task=task,
                        gui_agent=gui_agent,
                        env_op=env_op,
                        log_path=log_path,
                        to_init_task=True,
                        max_steps=None,  # Will be calculated from task.complexity
                        reflection=None
                    )
                )
                
                # Call Concluder only if reflection_rounds > 0 and agent_rpa is available
                # In pure exploration mode (reflection_rounds=0), skip Concluder to avoid unnecessary LLM calls
                if max_reflection_rounds > 0 and agent is not None:
                    print_with_color(
                        f"\n{'='*60}\n"
                        f"{'✅' if react_traj.final_success_bool else '❌'} Exploration {'succeeded' if react_traj.final_success_bool else 'failed'}. "
                        f"Calling Concluder...\n"
                        f"{'='*60}",
                        'blue'
                    )
                    
                    # Create episode result for Concluder
                    episode_result = models.EpisodeResult(
                        env_success_score=react_traj.env_success_score,
                        agent_done_bool=react_traj.agent_done_bool,
                        final_success_score=react_traj.final_success_score,
                        final_success_bool=react_traj.final_success_bool,
                        agent_traj=react_traj.traj,
                        action_history=react_traj.action_history,
                    )
                    
                    # Set token recording for Concluder (single-round exploration)
                    agent.record_token.step = '-'
                    agent.record_token.stage = 'Exploration Conclusion'
                    
                    # Save agent's previous state
                    prev_action_history = agent.action_history
                    prev_completed_tasks = agent.completed_tasks
                    prev_reflection_history = agent.reflection_history
                    
                    # Set up agent state for Concluder
                    if react_traj:
                        agent.action_history = react_traj.action_history
                        
                        # Extract completed tasks from trajectory
                        # Use the planner's completed_tasks from the last step
                        if react_traj.traj and len(react_traj.traj) > 0:
                            last_step = react_traj.traj[-1]
                            # Use Planner's completed_tasks - None or empty means task is at initial stage
                            if hasattr(last_step, 'completed_tasks'):
                                if last_step.completed_tasks in (None, ''):
                                    # Initial stage: no tasks completed yet
                                    agent.completed_tasks = ['You just started, no tasks have been completed yet.']
                                else:
                                    # Use Planner's actual completed_tasks output
                                    agent.completed_tasks = [last_step.completed_tasks]
                            else:
                                # Fallback if attribute doesn't exist
                                agent.completed_tasks = ['']
                        else:
                            agent.completed_tasks = ['No actions were executed in this exploration.']
                    
                    # Clear reflection history for single-round exploration (like in verification phase)
                    agent.reflection_history = []
                    
                    # Create Concluder log path
                    concluder_log_path = os.path.join(log_path, 'Concluder')
                    os.makedirs(concluder_log_path, exist_ok=True)
                    
                    # Call Concluder
                    concluder_output = agent.Concluder_Agent(
                        goal=task.goal,
                        log_task_path=concluder_log_path,
                        episode_results=episode_result
                    )
                    
                    # Store Concluder output in ReActTraj for Builder compatibility
                    if concluder_output and react_traj:
                        react_traj.conclusion = concluder_output.episode_conclusion
                        react_traj.reflection = concluder_output.reflection
                    
                    # Restore agent's previous state
                    agent.action_history = prev_action_history
                    agent.completed_tasks = prev_completed_tasks
                    agent.reflection_history = prev_reflection_history
                    
                    print_with_color(
                        f"📝 Conclusion: {concluder_output.episode_conclusion if concluder_output else 'N/A'}",
                        'cyan'
                    )
                
                # Save to trajectory bank (only if successful)
                if react_traj:
                    # Set round number for single-round exploration
                    react_traj.round = 0
                    # In single-round mode also put in all_react_trajs for unified stats (task_x_round_y)
                    all_react_trajs = [react_traj]
                    
                    # Only save if successful
                    if react_traj.final_success_bool:
                        react_traj_bank.add_react_traj(
                            task_type,
                            task.goal,
                            task_idx,
                            [react_traj],
                            force_update=FLAGS.force_update_react_trajs_bank
                        )
                        print_with_color(
                            f"✅ Saved single-round trajectory to react_trajs_bank (success)",
                            'green'
                        )
                    else:
                        print_with_color(
                            f"⚠️  Not saving to react_trajs_bank: single-round exploration failed",
                            'yellow'
                        )
            
            elapsed = time.time() - start_time
            
            save_json(react_traj, save_path=log_path, file_name='react_traj.json')
            if all_react_trajs:
                save_json(all_react_trajs, save_path=log_path, file_name='all_react_trajs.json')
            
            # Record results
            # For multi-round tasks, record by round: task_{idx}_round_{round}
            # For consistency, single round also uses round_0
            if all_react_trajs:
                for traj in all_react_trajs:
                    round_idx = getattr(traj, "round", 0)
                    key_base = f"task_{task_idx}_round_{round_idx}"
                    # Success / failure
                    result[key_base] = 1 if getattr(traj, "final_success_bool", False) else 0
                    # Whether shell action was used
                    has_shell = 0
                    for step in getattr(traj, "traj", []):
                        # V2 ReActStepInfo: use soft_coded_action if available, otherwise hard_coded_action.
                        if 'shell(' in (step.soft_coded_action or step.hard_coded_action):
                            has_shell = 1
                            break
                    result[f"{key_base}_has_shell"] = has_shell
            else:
                # No trajectory (e.g. task init failed); for compatibility write a round_0 failure
                key_base = f"task_{task_idx}_round_0"
                result[key_base] = 0
                result[f"{key_base}_has_shell"] = 0
            
            # Episode metadata
            episode = {
                constants.EpisodeConstants.GOAL: task.goal,
                constants.EpisodeConstants.TASK_TEMPLATE: task.name,
                constants.EpisodeConstants.INSTANCE_ID: task_idx,
                constants.EpisodeConstants.IS_SUCCESSFUL: react_traj.final_success_bool if react_traj else False,
                constants.EpisodeConstants.EPISODE_LENGTH: len(react_traj.traj) if react_traj else 0,
                constants.EpisodeConstants.RUN_TIME: elapsed,
                constants.EpisodeConstants.AGENT_NAME: gui_agent.agent_name,
            }
            episodes_metadata.append(episode)
        
        # Record to CSV
        record_exp_result(exp_result_csv, result)
        
        print_with_color(
            f"\n{'='*80}\n"
            f"✅ Completed Task Type: {task_type}\n"
            f"{'='*80}\n",
            'green'
        )
    
        # Save trajectory bank
        if FLAGS.update_react_trajs_bank:
            react_traj_bank.save()
    
    # Print summary
    total_tasks = len(episodes_metadata)
    successful = sum(1 for ep in episodes_metadata if ep[constants.EpisodeConstants.IS_SUCCESSFUL])
    print_with_color(
        f"\n{'='*80}\n"
        f"📊 Exploration Summary\n"
        f"{'='*80}\n"
        f"Total Tasks: {total_tasks}\n"
        f"Successful: {successful} ({successful/total_tasks*100:.1f}%)\n"
        f"Failed: {total_tasks - successful}\n"
        f"{'='*80}\n",
        'blue'
    )
    
    return episodes_metadata

