"""
A hierarchical PPO.
"""
from trainer.trainer import Trainer
from agents.ppo import PPO_DEFAULT_CONFIG
from policy.policy import PolicySpec
from agents.ppo.communication import PPOComPolicy
from agents.ppo.policy import PPOTorchPolicy


# yapf: disable
# __sphinx_doc_begin__

PPO_HRL_DEFAULT_CONFIG = Trainer.merge_trainer_configs(
    PPO_DEFAULT_CONFIG,  # See keys in trainer.py, which are also supported.
    {
        # === Hierarchical Training ===
        "multiagent": {
            "policies": {
                "high_level_policy": PolicySpec(policy_class=PPOComPolicy),
                "low_level_policy": PolicySpec(policy_class=PPOTorchPolicy),
            },
            "policy_mapping_fn": lambda agent_id, episode, worker, **kwargs: "low_level_policy" if agent_id.startswith("agent_") else "high_level_policy",
            "policies_to_train": ["high_level_policy", "low_level_policy"],
        },
        "high_level_policy_config": {},
        "low_level_policy_config": {},
    },
    _allow_unknown_configs=True,
)

# __sphinx_doc_end__
# yapf: enable
