starcraft-alp-gmm-ppo:
  run: PPO-curriculum
  checkpoint_freq: 1000
  checkpoint_at_end: true
#  restore: null
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 2000000
  config:
    callbacks:
      type: env.starcraft.utils.PvEMetricsCallback

    teacher_config:
      type: agents.curriculum.teacher.ALPGMMTeacher
      num_agents: [3, 8, 10]
      gmm_fitness_func: aic
      warm_start: false
      nb_em_init: 1
      fit_rate: 250
      alp_window_size: 20
      random_task_ratio: 0.2
      nb_bootstrap: null
      initial_dist: null

    env: env.starcraft.StarCraft2CurriculumPvEEnv
    env_config:
      map_name: 10m_vs_11m
      heuristic_ai: true
      heuristic_rest: true
      max_num_agents: 10
      in_evaluation: false

    num_workers: 10
    # num_cpus_for_driver: 1
    evaluation_num_workers: 1
    evaluation_interval: 5  # iterations
    evaluation_duration: 20
    evaluation_duration_unit: episodes
    evaluation_parallel_to_training: true

    num_envs_per_worker: 1
    num_cpus_per_worker: 1
    num_gpus: 1
    num_gpus_per_worker: 0

    evaluation_config:
      env_config:
        map_name: 10m_vs_11m
        max_num_agents: 10
        in_evaluation: true

    num_sgd_iter: 3

    model:
      custom_model: action_mask_model

    multiagent:
      policies: ["shared_policy"]
      # YAML-capable policy_mapping_fn definition via providing a callable class here.
      policy_mapping_fn:
        type: env.starcraft.utils.MultiAgentParameterSharingPolicyMappingFn
      policies_to_train: ["shared_policy"]
