seed: 0

env:
  max_steps: 400
  continuous_actions: True
  scenario_name: "tag_potential"
  scenario:
    n_agents: 2
    n_adversaries: 0
    n_escapers: 2

    # World dimensions
    bound: 1.0

    # Agent sizes
    chaser_radius: 0.075
    escaper_radius: 0.05

    # Lidar settings
    use_lidar: False
    lidar_range: 0.35

    # Agent dynamics
    chaser_speed: 2.0
    chaser_u_multiplier: 3.0

    # Escaper AI parameters
    escaper_speed: 1.0
    escaper_activation_range: 1.5
    wall_avoidance_margin: 0.5
    wall_repulsion_strength: 1.0
    escaper_noise: 0.00
    escaper_repulsion_strength: 0.5
    escaper_center_attraction_strength: 2.5

    # Smoothing parameters
    velocity_smoothing: 0.3  # Momentum coefficient (0-1)
    max_acceleration: 2.5   # Maximum acceleration for smooth movement

    # Reward parameters
    scaling_coef_1: 0.1
    scaling_coef_2: 10.0
    gen_agg_type_inner: "max"
    gen_agg_type_outer: "min"

  device: ??? # These values will be populated dynamically
  vmas_envs: ???

model:
  centralised_critic: True  # MAPPO if True, IPPO if False
  use_dico: False
  desired_snd_upper: 2
  desired_snd_lower: 0.5
  action_loss_lr: 0.00003
  reset_with_env: False

collector:
  frames_per_batch: 60_000 # Frames sampled each sampling iteration
  n_iters: 500 # Number of sampling/training iterations
  n_iters_env: 0
  env_optim_interval: 2
  total_frames: ???

buffer:
  memory_size: ???

loss:
  normalize_advantage: True
  gamma: 0.99
  lmbda: 0.9
  entropy_eps:  0.005
  clip_epsilon: 0.2


train:
  num_epochs: 45  # optimization steps per batch of data collected
  minibatch_size: 4096 # size of minibatches used in each epoch
  lr: 0.00005
  adam_eps: 0.000001
  env_lr: 1e-1
  max_grad_norm: 5.0
  device: ???

eval:
  evaluation_interval: 5
  evaluation_episodes: 200
  explore: False

logger:
  backend: wandb # Delete to remove logging
  project_name: "tag"
  group_name: "${env.scenario.gen_agg_type_inner}_${env.scenario.gen_agg_type_outer}" 
