policy:
  model_type: gnn
  initial_std: 0.3
  policy_node_hidden_size: 64
  policy_head_hidden_size: 64
  policy_gnn_depth: 1
  policy_head_depth: 2
  policy_connectivity:
    kind: knn
    k: 5
  critic_node_hidden_size: 128
  critic_gnn_depth: 3
  critic_connectivity:
    kind: knn
    k: 5
ppo:
  n_iters: 301
  n_epochs: 8
  minibatch_size: 150
  n_mini_batches: 20
  clip_epsilon: 0.2
  gamma: 0.99
  lmbda: 0.95
  actor_lr: 3e-4
  critic_lr: 3e-4
  min_critic_lr: 1e-4
  lr_scheduler_enabled: true
  max_grad_norm: 1.0
  entropy_eps: 0.0
  normalise_advantage: true
logging:
  mode: online
  evaluation_interval: 20
  evaluation_episodes: 1
  checkpoint_interval: 50