connect-four-self-play-pfsp:
  run: League
  checkpoint_freq: 200
  checkpoint_at_end: true
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 1000000000
  config:
    seed: 123
    log_level: INFO
    callbacks:
      type: agents.league.callbacks.SelfPlayMetricsCallback
    env: connect_four

    num_workers: 9
    num_cpus_for_driver: 1

    num_envs_per_worker: 5
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0
    num_sgd_iter: 20
    model:
        fcnet_hiddens: [256, 256]

    league_config:
      type: agents.league.league.SelfPlayLeague
      match_func: PFSP
      win_rate_threshold: 0.95
      iter_threshold: 30

    multiagent:
      policies: null
      policy_mapping_fn: null
      policies_to_train: null
      policy_map_capacity: 1000

  metric_columns:
    training_iteration: iter
    time_total_s: time_total_s
    timesteps_total: ts
    episodes_this_iter: episodes_this_iter
    version: version
