seed: 0

env:
  max_steps: 10
  continuous_actions: True
  scenario_name: "flag_capture_unembodied"
  scenario:
    n_agents: 4
    discrete_actions: False
    task_rewards: [1.0,1.0,1.0,1.0]
    gen_agg_type_task: "min"
    gen_agg_type_agent: "mean"
  device: ??? # These values will be populated dynamically
  vmas_envs: ???

model:
  centralised_critic: True  # MAPPO if True, IPPO if False
  use_dico: False
  desired_snd_upper: 2
  desired_snd_lower: 0.5
  action_loss_lr: 0.00003
  reset_with_env: False

collector:
  frames_per_batch: 60_000 # Frames sampled each sampling iteration
  n_iters: 200 # Number of sampling/training iterations
  n_iters_env: 0
  env_optim_interval: 2
  total_frames: ???

buffer:
  memory_size: ???

loss:
  normalize_advantage: True
  gamma: 0.99
  lmbda: 0.9
  entropy_eps:  0.001
  clip_epsilon: 0.2

train:
  num_epochs: 40  # optimization steps per batch of data collected
  minibatch_size: 4096 # size of minibatches used in each epoch
  lr: 5e-5
  adam_eps: 0.000001
  env_lr: 1e-1
  max_grad_norm: 5.0
  device: ???

eval:
  evaluation_interval: 5
  evaluation_episodes: 200
  explore: False

logger:
  backend: wandb # Delete to remove logging
  project_name: "matrix_game_cont"
  group_name: null
