seed: 0

env:
  max_steps: 100
  continuous_actions: True
  scenario_name: "flag_capture"
  scenario:
    n_agents: 2
    n_flags: 2
    gen_agg_type_task: "powersum"
    gen_agg_type_agent: "powersum"
    spawn_agents_in_same_pos: False
    flag_capture_reward:  0
    reward_type: "percentage"
    use_lidar: False
    lidar_range: 0
    reach_flag_line_rew: True
  device: ??? # These values will be populated dynamically
  vmas_envs: ???

model:
  centralised_critic: True  # MAPPO if True, IPPO if False
  use_dico: False
  desired_snd_upper: 2
  desired_snd_lower: 0.5
  action_loss_lr: 0.00003
  reset_with_env: False

collector:
  frames_per_batch: 60_000 # Frames sampled each sampling iteration
  n_iters: 2500 # Number of sampling/training iterations
  n_iters_env: 2000
  env_optim_interval: 2
  total_frames: ???

buffer:
  memory_size: ???

loss:
  normalize_advantage: True
  gamma: 0.99
  lmbda: 0.9
  entropy_eps:  0.005
  clip_epsilon: 0.2

train:
  num_epochs: 45  # optimization steps per batch of data collected
  minibatch_size: 4096 # size of minibatches used in each epoch
  lr: 0.00005
  adam_eps: 0.000001
  env_lr: 1e-2
  max_grad_norm: 5.0
  device: ???
  training_routine: "alternated"
  num_epochs_env: 1

eval:
  evaluation_interval: 5
  evaluation_episodes: 200
  explore: False

logger:
  backend: wandb # Delete to remove logging
  project_name: "het_env_design_goal_nav_design"
  group_name: null
