"""
Transfer experiment grid for LunarLander-v3.
"""

from umfavi.experiments.config import ExperimentGrid


# ============================================================================
# Transfer experiment grid
# ============================================================================

grid = ExperimentGrid(
    base_config={
        # Environment
        "env_id": "Acrobot-v1",
        
        # Expert policy for regret computation - set conditionally based on asymmetry below
        
        # Evaluation parameters
        "num_samples": 1000,
        "max_num_steps": 1000,
        "gamma": 0.99,
        
        # PPO retraining verbosity
        "retrain_verbose": 0,
        "no_progress_bar": True,

        # Wandb
        "log_wandb": True,

        # Action transform
        "act_transform": "one_hot",
        "encoder_hidden_sizes": [256, 256],

        # Reward domain
        "reward_domain": "sa",

        "wandb_project": "acrobot-transfer",
    }
)

# ============================================================================
# Environment perturbations (use "env_params." prefix for env-specific args)
# ============================================================================

# add asymmetry parameter for Acrobot transfer
grid.add("env_params.asymmetry", [1.0, 1.05, 1.1, 1.15, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0])



# Map each asymmetry value to its corresponding trained optimal policy
OPTIMAL_POLICY_BASE = "<path_to_expert_policies>/umfavi/expert_policies/expert_models_acrobot_asymmetry"
ASYMMETRY_TO_POLICY = {
    1.0: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.0/best_model.zip",
    1.05: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.05/best_model.zip",
    1.1: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.1/best_model.zip",
    1.15: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.15/best_model.zip",
    1.2: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.2/best_model.zip",
    1.3: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.3/best_model.zip",
    1.4: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.4/best_model.zip",
    1.5: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.5/best_model.zip",
    1.6: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.6/best_model.zip",
    1.7: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.7/best_model.zip",
    1.8: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.8/best_model.zip",
    1.9: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_1.9/best_model.zip",
    2.0: f"{OPTIMAL_POLICY_BASE}/Acrobot-v1_1_asymmetry_2.0/best_model.zip",
}

for asymmetry, policy_path in ASYMMETRY_TO_POLICY.items():
    grid.add_conditional(
        "optimal_policy_path",
        [policy_path],
        condition=lambda c, a=asymmetry: c.get("env_params.asymmetry") == a
    )


# ============================================================================
# Feedback combinations and reward model paths
# ============================================================================
grid.add("feedback_combo", [
    'demo_only', 
    'pref_only',
    'rating_only',
    'stop_only',
    'demo+pref',
    'demo+rating',
    'demo+stop',
    'pref+rating',
    'pref+stop',
    'rating+stop',
    'demo+pref+rating+stop',
    'imitation'])


grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance

], condition=lambda c: c.get("feedback_combo") == "demo+pref")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+pref+rating+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+rating")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "imitation")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref+rating")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "rating+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "rating_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "stop_only")



# Differentiate between modes
grid.add_conditional("mode", ["reward_model"], condition=lambda c: c.get("feedback_combo") != "imitation")
grid.add_conditional("mode", ["imitation"], condition=lambda c: c.get("feedback_combo") == "imitation")

# ============================================================================
# Validation
# ============================================================================

# Ensure fb_model_path is specified
grid.add_validator(lambda c: c.get("fb_model_path") is not None)

if __name__ == "__main__":
    # Print summary when run directly
    print(grid.summary(seeds=10))
    
    # Also show example of how to get best models
    print("\n" + "=" * 60)
    print("To identify best models, run:")
    print("  python -m umfavi.experiments.cli select-best --queue-dir tasks")
    print("=" * 60)
