"""
Transfer experiment grid for LunarLander-v3.
"""

from umfavi.experiments.config import ExperimentGrid


# ============================================================================
# Transfer experiment grid
# ============================================================================

grid = ExperimentGrid(
    base_config={
        # Environment
        "env_id": "LunarLander-v3",
        
        # Evaluation parameters
        "num_samples": 1000,
        "max_num_steps": 1000,
        "gamma": 0.999,
        
        # PPO retraining verbosity
        "retrain_verbose": 0,
        "no_progress_bar": True,

        # Wandb
        "log_wandb": True,

        # Action transform
        "act_transform": "one_hot",
        "encoder_hidden_sizes": [256, 256],

        # Reward domain
        "reward_domain": "sa",
    }
)

# ============================================================================
# Environment perturbations (use "env_params." prefix for env-specific args)
# ============================================================================

# Enable wind for LunarLander (feedback was provided on trajectories where enable_wind=False)
grid.add("env_params.enable_wind", [True])

# LunarLander wind power perturbation
grid.add("env_params.wind_power", [0.0, 5.0, 10.0, 15.0, 20.0, 25.0])
grid.add("env_params.gravity", [-10.0, -11.0, -11.9])


# Add custom optimal policy per environment perturbation
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3_2/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 0.0)
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3/wind_power_5.0/ppo/LunarLander-v3_2/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 5.0)
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3/wind_power_10.0/ppo/LunarLander-v3_1/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 10.0)
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3/wind_power_15.0/ppo/LunarLander-v3_1/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 15.0)
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3/wind_power_20.0/ppo/LunarLander-v3_1/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 20.0)
grid.add_conditional("optimal_policy_path", ["~/umfavi/expert_policies/ppo/LunarLander-v3/wind_power_25.0/ppo/LunarLander-v3_1/best_model.zip"], condition=lambda c: c.get("env_params.wind_power") == 25.0)


# ============================================================================
# Feedback combinations and reward model paths
# ============================================================================
grid.add("feedback_combo", [
    'demo+pref',
    'demo+pref+rating+stop',
    'demo+rating',
    'demo+stop',
    'demo_only',
    'pref+rating',
    'pref+stop',
    'pref_only',
    'rating+stop',
    'rating_only',
    'demo+pref+rating+stop',
    'imitation'
    ]
)


grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance

], condition=lambda c: c.get("feedback_combo") == "demo+pref")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+pref+rating+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+rating")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "demo_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "imitation")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref+rating")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "pref_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "rating+stop")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "rating_only")

grid.add_conditional("fb_model_path", [
    '<path_to_best_model>', # paths to models will be provided at acceptance
], condition=lambda c: c.get("feedback_combo") == "stop_only")


# Differentiate between modes
grid.add_conditional("mode", ["reward_model"], condition=lambda c: c.get("feedback_combo") != "imitation")
grid.add_conditional("mode", ["imitation"], condition=lambda c: c.get("feedback_combo") == "imitation")

# ============================================================================
# Validation
# ============================================================================

# Ensure fb_model_path is specified
grid.add_validator(lambda c: c.get("fb_model_path") is not None)

if __name__ == "__main__":
    # Print summary when run directly
    print(grid.summary(seeds=10))
    
    # Also show example of how to get best models
    print("\n" + "=" * 60)
    print("To identify best models, run:")
    print("  python -m umfavi.experiments.cli select-best --queue-dir tasks")
    print("=" * 60)
