defaults:
  - _self_
  - metadata/training@_here_
  - designer: diffusion_gnn_distill

experiment_name: corners_agent_gnn_distill
scenario_name: rware_16_50_5_4_corners
device:
  device_management: "gpu"
  gpu_id: 0
  max_gpu_memory: 1.0
ppo:
  n_iters: 5000
  n_epochs: 5
  minibatch_size: 500
  n_mini_batches: 10
  clip_epsilon: 0.2
  gamma: 0.99
  lmbda: 0.9
  actor_lr: 3e-4
  min_actor_lr: 0
  critic_lr: 3e-4
  min_critic_lr: 1.5e-4
  max_grad_norm: 1.0
  entropy_eps: 1e-3
  normalise_advantage: false
start_from_checkpoint: null
policy:
  version: "v2"
  critic_kwargs:
    model_channels: 64

  

logging:
  type: wandb
  mode: disabled
  # mode: online
  # mode: offline
  evaluation_interval: 20
  evaluation_episodes: 5