"""
A multi-agent, league-capable PPO.
"""
from trainer.trainer import Trainer
from agents.ppo import PPO_DEFAULT_CONFIG


# yapf: disable
# __sphinx_doc_begin__

LEAGUE_DEFAULT_CONFIG = Trainer.merge_trainer_configs(
    PPO_DEFAULT_CONFIG,  # See keys in trainer.py, which are also supported.
    {
        # League-building parameters.
        # The League class to be used for league building logic.
        "league_config": {
            # "type": DummyLeague,
            # Minimum win-rate (between 0.0 = 0% and 1.0 = 100%) of any policy to
            # be considered for snapshotting (cloning). The cloned copy may then
            # be frozen (no further learning) or keep learning (independent of
            # its ancestor policy).
            # Set this to lower values to speed up league growth.
            # "win_rate_threshold": 0.9,
            # If we took a new snapshot of any given policy, what's the probability
            # that this snapshot will continue to be trainable (rather than become
            # frozen/non-trainable)? By default, only keep those policies trainable
            # that have been trainable from the very beginning.
            # "keep_new_snapshot_training_prob": 0.0,
            # Probabilities of different match-types:
            # LE: Learning league_exploiter vs any.
            # ME: Learning main exploiter vs any main.
            # M: Main self-play (p=1.0 - LE - ME).
            # "prob_league_exploiter_match": 0.33,
            # "prob_main_exploiter_match": 0.33,
            # # Only for ME matches: Prob to play against learning
            # # main (vs a snapshot main).
            # "prob_main_exploiter_playing_against_learning_main": 0.5,
        },
    },
    _allow_unknown_configs=True,
)

# __sphinx_doc_end__
# yapf: enable
