seed: 0

env:
  max_steps: 500
  continuous_actions: True
  scenario_name: "soccer_design"
  scenario:
    ai_strength: 0.5
    gen_agg_type_task: "min"
    gen_agg_type_agent: "max"
    rew_coeff_flags: 1e-3
    rew_coeff_others: 10
    scoring_reward: 0
  device: ??? # These values will be populated dynamically
  vmas_envs: ???

model:
  centralised_critic: True  # MAPPO if True, IPPO if False
  use_dico: False
  desired_snd_upper: 2
  desired_snd_lower: 0.5
  action_loss_lr: 0.00003
  reset_with_env: False

collector:
  frames_per_batch: 240_000 # Frames sampled each sampling iteration
  n_iters: 500 # Number of sampling/training iterations
  n_iters_env: 0
  env_optim_interval: 2
  total_frames: ???

buffer:
  memory_size: ???

loss:
  normalize_advantage: True
  gamma: 0.99
  lmbda: 0.9
  entropy_eps:  0.005
  clip_epsilon: 0.2

train:
  num_epochs: 45  # optimization steps per batch of data collected
  minibatch_size: 4096 # size of minibatches used in each epoch
  lr: 0.00005
  adam_eps: 0.000001
  env_lr: 1e-1
  max_grad_norm: 5.0
  device: ???

eval:
  evaluation_interval: 5
  evaluation_episodes: 200
  explore: False

logger:
  backend: wandb # Delete to remove logging
  project_name: "soccer_env_design"
  group_name: null
