#!/usr/bin/env python3
"""
Test script for remote JephHinter workflow execution.
This script demonstrates how to run the JephHinter workflow remotely.
"""

import logging
import sys
from datetime import datetime
from pathlib import Path
import os
# Add the project root to the Python path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from jephhinter.configs import AgentLabRunConfig, HintPromptConfig, JephHinterConfig, JephHinterWorkflowConfig, AutoGuideConfig_workarena_l1, AutoGuideConfig_miniwob
from jephhinter.jephhinter_workflow import (
    run_data_collection_only_remotely,
    run_hint_generation_and_evaluation_remotely,
)

# from agentlab.agents.generic_agent_hinter.agent_configs import (
#     AGENT_CLAUDE_SONNET_4,
#     AGENT_37_SONNET,
#     AGENT_GPT5,
# )
from agentlab.agents.tool_use_agent.tool_use_agent import (
    GPT_5,
)

from agentlab.agents.generic_agent_hinter.tmlr_config import get_base_agent, get_vision_agent

log_format = "%(asctime)s - %(filename)s:%(lineno)d - %(funcName)s - %(levelname)s - %(message)s"
logging.basicConfig(level=logging.INFO, format=log_format, datefmt="%Y-%m-%d %H:%M:%S", force=True)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

if __name__ == "__main__":
    logger.info("JephHinter Workflow Remote Execution Test")
    logger.info("=" * 50)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    benchmark_name = "workarena_l1"

    methods = ["jephhinter", "autoguide"]
    offline_datasets = ["workarena-l1_with_3_golden_traces"] # , "workarena-l1_with_1_golden_traces", "workarena-l1"
    base_models = ["gpt-5-mini", "gpt-5-nano"]

    with_hint = True
    step = "hint_generation_and_evaluation" # "data_collection_only" or "hint_generation_and_evaluation"

    for base_model in base_models:
        logger.info(f"Running with base model: {base_model}")
        for offline_dataset in offline_datasets:
            logger.info(f"Running with offline dataset: {offline_dataset}")
            for method in methods:
                logger.info(f"Running with method: {method}")
                # List of agents to test
                agents_to_test = [
                    get_base_agent(f"openai/{base_model}-2025-08-07"),
                    # get_base_agent("openai/gpt-5-nano-2025-08-07")
                ]

                # List of JephHinter models to test
                jephhinter_models = [
                    GPT_5,
                ]

                if benchmark_name == "miniwob":
                    max_steps = 10
                    n_repeats = 5
                    n_jobs = 20
                    if with_hint:
                        exp_root = f"/mnt/ui_assist/data_rw/jephhinter_iclr/miniwob/2025-08-07_21-09-16_GenericAgent-openai_{base_model}_on_miniwob"
                    else:
                        None

                elif benchmark_name == "workarena_l1":
                    max_steps = 15
                    n_repeats = 10
                    n_jobs = 5
                    if with_hint:
                        exp_root = f"/mnt/ui_assist/data_rw/jephhinter_iclr/{offline_dataset}/2025-08-07_21-09-16_GenericAgent-openai_{base_model}_on_workarena-l1"
                    else:
                        None
                else:
                    exp_root = None

                # Define hyperparameter combinations to sweep over
                if method == "jephhinter":
                    hyperparameter_combinations = [
                        # (exclude_axtree, n_traces_to_hinter, use_step_zoom, step_wise_hinting, n_steps_for_hinting)
                        # ("v1", True, True, 3, False, False, 2),    # Exclude axtree, 5 traces, no step zoom, no step-wise, 2 steps
                        ("v2", True, False, 2, True, False, 2),    # Exclude axtree, 2 traces, no step zoom, no step-wise, 2 steps
                    ]
                else:
                    hyperparameter_combinations = [
                        # (exclude_axtree, n_traces_to_hinter, use_step_zoom, step_wise_hinting, n_steps_for_hinting)
                        ("v1", False, True, 2, False, False, 2),   # Default configuration
                    ]
            
                # Calculate total number of experiments
                total_experiments = len(agents_to_test) * len(jephhinter_models) * len(hyperparameter_combinations)
                logger.info(f"Total experiments to launch: {total_experiments}")
                logger.info(f"  - Agents: {len(agents_to_test)}")
                logger.info(f"  - JephHinter models: {len(jephhinter_models)}")
                logger.info(f"  - Hyperparameter combinations: {len(hyperparameter_combinations)}")
                logger.info("")

                # Log the hyperparameter combinations being tested
                logger.info("Hyperparameter combinations to test:")
                for i, (version, exclude_html, exclude_axtree, n_traces_to_hinter, use_step_zoom, step_wise_hinting, n_steps_for_hinting) in enumerate(hyperparameter_combinations):
                    logger.info(f"  {i+1}. version={version}, exclude_html={exclude_html}, exclude_axtree={exclude_axtree}, n_traces_to_hinter={n_traces_to_hinter}, use_step_zoom={use_step_zoom}, step_wise_hinting={step_wise_hinting}, n_steps_for_hinting={n_steps_for_hinting}")
                logger.info("")


                if step == "data_collection_only":
                    for agent_args in agents_to_test:
                        for jephhinter_model in jephhinter_models:
                            for version, exclude_html, exclude_axtree, n_traces_to_hinter, use_step_zoom, step_wise_hinting, n_steps_for_hinting in hyperparameter_combinations:
                                # Step 1: Run data collection only (recommended for first time)
                                logger.info("\n=== Step 1: Run data collection only ===")
                                if exp_root is None:
                                    try:
                                        exp_root = f"{timestamp}-{agent_args.agent_name.lower()}"
                                        agent_name = agent_args.agent_name.lower()
                                    except:
                                        exp_root = f"{timestamp}-{agent_args.model_name.lower()}"
                                        agent_name = agent_args.model_name.lower()
                                else:
                                    agent_name = agent_args.agent_name.lower()
                                
                                # Create the JephHinter config with the custom hint prompt config
                                if method == "autoguide":
                                    if benchmark_name == "workarena_l1":
                                        hinter_config = AutoGuideConfig_workarena_l1()
                                    elif benchmark_name == "miniwob":
                                        hinter_config = AutoGuideConfig_miniwob()
                                else:
                                    hinter_config = JephHinterConfig(version=version)

                                # Create the agentlab config first
                                agentlab_config = AgentLabRunConfig(
                                    agent_args=agent_args,
                                    exp_root=exp_root,
                                    max_steps=max_steps,
                                    benchmark_name=benchmark_name,
                                    hint_db_path=exp_root + f"_with_hints_{hinter_config.source}_{hinter_config.version}_gpt-5/hint_db_updated.csv",
                                    use_task_hint=with_hint,
                                    n_repeats=n_repeats,
                                    n_jobs=n_jobs,
                                )

                                # Create the workflow config with the agentlab config embedded
                                config = JephHinterWorkflowConfig(
                                    interactive_job=False,
                                    exp_root=exp_root,
                                    eval_config=agentlab_config,
                                    hinter_models=jephhinter_models,
                                    hinter_config=hinter_config,
                                )

                                
                                # Try remote execution first, fall back to local if it fails
                                job_id = run_data_collection_only_remotely(config, with_hint=with_hint)
                                if job_id:
                                    logger.info(f"✅ Data collection job launched remotely for {agent_name} with ID: {job_id}")
                                    logger.info(f"   Check status: eai job logs -f {job_id}")
                                else:
                                    logger.info(f"❌ Failed to launch data collection job for {agent_name}")
                else:
                    # Step 2: Run hint generation and evaluation (use existing traces)
                    logger.info("\n=== Step 2: Run hint generation and evaluation ===")
                    for agent_args in agents_to_test:
                        for jephhinter_model in jephhinter_models:
                            for version, exclude_html, exclude_axtree, n_traces_to_hinter, use_step_zoom, step_wise_hinting, n_steps_for_hinting in hyperparameter_combinations:
                                # Create a descriptive name for this hyperparameter combination
                                hp_suffix = f"html_{'ex' if exclude_html else 'in'}_axtree_{'ex' if exclude_axtree else 'in'}_traces_{n_traces_to_hinter}_zoom_{'on' if use_step_zoom else 'off'}_stepwise_{'on' if step_wise_hinting else 'off'}_steps_{n_steps_for_hinting}"
                                if exp_root is None:
                                    exp_root = f"{timestamp}-jephhinter-{agent_args.agent_name.lower()}"
                                
                                # Create the agentlab config first
                                agentlab_config = AgentLabRunConfig(
                                    agent_args=agent_args,
                                    exp_root=exp_root,
                                    max_steps=max_steps,
                                    benchmark_name=benchmark_name,
                                    n_repeats=n_repeats,
                                    n_jobs=n_jobs,
                                )
                                
                                # Create the hint prompt config with current hyperparameters
                                hint_prompt_config = HintPromptConfig(
                                    exclude_html=exclude_html,
                                    exclude_axtree=exclude_axtree,
                                    n_traces_to_hinter=n_traces_to_hinter,
                                    use_step_zoom=use_step_zoom,
                                    step_wise_hinting=step_wise_hinting,
                                    n_steps_for_hinting=n_steps_for_hinting
                                )
                                
                                # Create the JephHinter config with the custom hint prompt config
                                if method == "autoguide":
                                    if benchmark_name == "workarena_l1":
                                        hinter_config = AutoGuideConfig_workarena_l1()
                                    elif benchmark_name == "miniwob":
                                        hinter_config = AutoGuideConfig_miniwob()
                                else:
                                    hinter_config = JephHinterConfig(
                                        hint_prompt_config=hint_prompt_config,
                                        version=version
                                    )
                                
                                # Create the workflow config with the agentlab config embedded
                                config = JephHinterWorkflowConfig(
                                    interactive_job=False,
                                    exp_root=exp_root,
                                    eval_config=agentlab_config,
                                    hinter_models=[jephhinter_model],
                                    hinter_config=hinter_config,
                                )
                                
                                logger.info(f"Launching job for {agent_args.agent_name} + {jephhinter_model} + {hp_suffix}")
                                
                                # Try remote execution first, fall back to local if it fails
                                job_id = run_hint_generation_and_evaluation_remotely(config)
                                if job_id:
                                    logger.info(f"✅ Hint generation job launched remotely for {agent_args.agent_name} + {jephhinter_model} + {hp_suffix} with ID: {job_id}")
                                    logger.info(f"   Check status: eai job logs -f {job_id}")
                                else:
                                    logger.info(f"❌ Failed to launch hint generation job for {agent_args.agent_name} + {jephhinter_model} + {hp_suffix}")

                logger.info("\n" + "=" * 50)
                logger.info("Test completed!")
                logger.info("Check job status with: eai job ls")
