qnyh-self-play-variance-pfsp-population-entropy:
  run: PopulationEntropy
  checkpoint_freq: 500
  checkpoint_at_end: true
#  restore: null
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 50000000
  config:
    seed: 123
    log_level: INFO
    callbacks:
      type: agents.league.callbacks.AsymmetricSelfPlayMetricsCallback
    env: env.qnyh_small.multi_agent_env.QnyhSmallSelfplay
    env_config:
      print_game_log: False
      races:
        left: Shooter
        right: Tank
      select_skill: True
      classify: False

    num_workers: 60
    num_cpus_for_driver: 1
#    evaluation_num_workers: 10  # total_num_eval_workers (divided for each env_config)
#    evaluation_interval: 10  # after given training iterations
#    evaluation_duration: 20  # duration per env_config
#    evaluation_duration_unit: episodes
#    evaluation_parallel_to_training: true
    num_envs_per_worker: 3
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0
    simple_optimizer: true
#    num_sgd_iter: 20
#    model:
#        fcnet_hiddens: [256, 256]

#    evaluation_config:
#      callbacks:
#        type: agents.league.callbacks.AsymmetricSelfPlayEvalCallback
#      env_config:
#        - print_game_log: False
#          races:
#            left: Shooter
#            right: Tank
#          select_skill: True
#          classify: False
#          rule_team: 0
#          hard_ai: false
#        - print_game_log: False
#          races:
#            left: Shooter
#            right: Tank
#          select_skill: True
#          classify: False
#          rule_team: 1
#          hard_ai: false

    population_entropy_coeff: 0.1

    league_config:
      type: agents.league.league.AsymmetricSelfPlayLeague
      match_func: PFSP
      win_rate_threshold: 0.95
      iter_threshold: 30
      max_league_size: 30

    multiagent:
      policies: null
      policy_mapping_fn: null
      policies_to_train: null

  metric_columns:
    training_iteration: iter
    time_total_s: time_total_s
    timesteps_total: ts
    episodes_this_iter: episodes_this_iter
    left_mean_win_rate: left_mean_win_rate
    right_mean_win_rate: right_mean_win_rate
    left_version: left_version
    right_version: right_version
