gfootball-academy-corner-self-play-alp-pfsp:
  run: League
  checkpoint_freq: 1000
  checkpoint_at_end: true
#  restore: null
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 1000000000
  config:
    seed: 1234
    log_level: INFO
    callbacks:
      type: agents.league.callbacks.AsymmetricSelfPlayMetricsCallback
    env: env.gfootball.self_play_env.SelfPlayFootballEnv
    env_config:
      env_name: academy_corner
      stacked: false
      rewards: scoring
      write_goal_dumps: false
      write_full_episode_dumps: true
      render: false
      write_video: true
      dump_frequency: 10000
      representation: simple115v2
      number_of_left_players_agent_controls: 1
      number_of_right_players_agent_controls: 1
      logdir: gfootball_dumps_alp/train
      other_config_options:
        action_set: default  # "default": action_set_v1 (19), "v2": action_set_v2 (19 + 1 built-in ai)
#      court_range: 0.3
      in_evaluation: false

    num_workers: 40
    num_cpus_for_driver: 1
    evaluation_num_workers: 20  # total_num_eval_workers (divided for each env_config)
    evaluation_interval: 10  # after given training iterations
    evaluation_duration: 20  # duration per env_config
    evaluation_duration_unit: episodes
    evaluation_parallel_to_training: true
    num_envs_per_worker: 1
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0
    rollout_fragment_length: 1000
    train_batch_size: 10000
    sgd_minibatch_size: 2000
    num_sgd_iter: 30

#    custom_eval_function:
#      type: env.gfootball.utils.EvalFn
    evaluation_config:
      callbacks:
        type: agents.league.callbacks.AsymmetricSelfPlayEvalCallback
      multiagent:
        policy_mapping_fn:
          - type: agents.league.callbacks.EvalMainLeftPolicyMappingFn
          - type: agents.league.callbacks.EvalMainRightPolicyMappingFn
      env_config:  # provide a list to eval in multi-env
        - env_name: academy_corner  # must eval left (b.c. policy mapping is fixed within these eval_workers)
          stacked: false
          rewards: scoring
          write_goal_dumps: false
          write_full_episode_dumps: true
          render: false
          write_video: true
          dump_frequency: 10
          representation: simple115v2
          number_of_left_players_agent_controls: 1
          number_of_right_players_agent_controls: 0
          logdir: gfootball_dumps_alp/eval_attack
          other_config_options:
            action_set: default  # "default": action_set_v1 (19), "v2": action_set_v2 (19 + 1 built-in ai)
#          court_range: 0.3
          in_evaluation: true
        - env_name: academy_corner  # must eval right (b.c. policy mapping is fixed within these eval_workers)
          stacked: false
          rewards: scoring
          write_goal_dumps: false
          write_full_episode_dumps: true
          render: false
          write_video: true
          dump_frequency: 10
          representation: simple115v2
          number_of_left_players_agent_controls: 0
          number_of_right_players_agent_controls: 1
          logdir: gfootball_dumps_alp/eval_defense
#          court_range: 0.3
          other_config_options:
            action_set: default  # "default": action_set_v1 (19), "v2": action_set_v2 (19 + 1 built-in ai)
          in_evaluation: true

    batch_mode: truncate_episodes
    gamma: 0.99
    lambda: 1.0
    kl_coeff: 0.2
    lr: 0.001
    lr_schedule: null
#    vf_loss_coeff: 0.01
#    model:
#        fcnet_hiddens: [256]
#        fcnet_activation: linear
#        vf_share_layers: true
#    optimizer:
#    horizon:
#    soft_horizon:
#    no_done_at_end:
#
#    entropy_coeff: 0.0
#    clip_param: 0.3
#    vf_clip_param: 10.0
#    explore: true
#    observation_filter: MeanStdFilter

    # No league-building needed.
    league_config:
      type: agents.league.league.AsymmetricSelfPlayLeague
      match_func: ALP
#      newest_prob: 0.7
      iter_threshold: 100
      max_league_size: 30

    multiagent:
      policies: null
      policy_mapping_fn: null
      policies_to_train: null
      policy_map_capacity: 1000
#      policy_map_cache: /home/policies_alp  # need to be existing absolute path

  metric_columns:
    training_iteration: iter
    time_total_s: time_total_s
    timesteps_total: ts
    episodes_this_iter: episodes_this_iter
    left_mean_win_rate: left_mean_win_rate
    right_mean_win_rate: right_mean_win_rate
    left_version: left_version
    right_version: right_version
