grid-shooting-self-play-pfsp:
  run: League
  checkpoint_freq: 200
  checkpoint_at_end: true
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 1000000000
  config:
    seed: 123
    log_level: INFO
    callbacks:
      type: agents.league.callbacks.AsymmetricSelfPlayMetricsCallback
    env: env.grid_world.shooting.GridShooting
    env_config:
      size: 9
      init_life: [5, 5]
      max_cd: [5, 5]

    num_workers: 60
    num_cpus_for_driver: 1

    num_envs_per_worker: 1
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0
    rollout_fragment_length: 100
    train_batch_size: 4000
    sgd_minibatch_size: 1000
    num_sgd_iter: 10

    batch_mode: truncate_episodes
    gamma: 0.99
    lambda: 1.0
    kl_coeff: 0.2
    lr: 0.001
    lr_schedule: null
#    vf_loss_coeff: 0.01
#    model:
#        fcnet_hiddens: [256]
#        fcnet_activation: linear
#        vf_share_layers: true
#    optimizer:
#    horizon:
#    soft_horizon:
#    no_done_at_end:
#
#    entropy_coeff: 0.0
#    clip_param: 0.3
#    vf_clip_param: 10.0
#    explore: true
#    observation_filter: MeanStdFilter

    league_config:
      type: agents.league.league.AsymmetricSelfPlayLeague
      match_func: ALP
      iter_threshold: 200

    multiagent:
      policies: null
      policy_mapping_fn: null
      policies_to_train: null
      policy_map_capacity: 1000

  metric_columns:
    training_iteration: iter
    time_total_s: time_total_s
    timesteps_total: ts
    episodes_this_iter: episodes_this_iter
    left_mean_win_rate: left_mean_win_rate
    right_mean_win_rate: right_mean_win_rate
    left_version: left_version
    right_version: right_version
