starcraft-3m-att-com-ppo:
  run: PPO-com
  checkpoint_freq: 1000
  checkpoint_at_end: true
#  restore: null
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 500000
  config:
    seed: 8888
    callbacks:
      type: env.starcraft.utils.PvEMetricsCallback
    env: env.starcraft.StarCraft2PvEComEnv
    env_config:
      map_name: 3m
      num_agents: 3

    num_workers: 30
    # num_cpus_for_driver: 1
    evaluation_num_workers: 5
    evaluation_interval: 20  # iterations
    evaluation_duration: 40
    evaluation_duration_unit: episodes
    evaluation_parallel_to_training: true

    num_envs_per_worker: 1
    num_cpus_per_worker: 1
    num_gpus: 1
    num_gpus_per_worker: 0

    evaluation_config:
      env_config:
        map_name: 3m
        num_agents: 3

    model:
      custom_model: att_com_action_mask_model
      custom_action_dist: hom_multi_action
      custom_model_config:
        encoder_hidden_layers: [256, 256]
        num_heads: 8
        head_dim: 64
        decoder_hidden_layers: [256]
