# @package _global_

defaults:
  - override /model: am-ppo.yaml
  - override /env: mdpp.yaml
  - override /callbacks: default.yaml
  - override /trainer: default.yaml
  - override /logger: wandb.yaml


logger:
  wandb:
    project: "rl4co"
    tags: ["am-ppo", "${env.name}"]
    group: ${env.name}
    name: am-ppo-${env.name}


model:
  batch_size: 64
  train_data_size: 1000
  val_data_size: 100
  test_data_size: 100
  clip_range: 0.2
  ppo_epochs: 2
  mini_batch_size: ${model.batch_size}
  vf_lambda: 0.5
  entropy_lambda: 0.01
  normalize_adv: False
  max_grad_norm: 0.5
  optimizer_kwargs:
    lr: 1e-4
    weight_decay: 1e-3

trainer:
  max_epochs: 10
  gradient_clip_val: Null # not supported in manual optimization
  precision: "32-true" # NOTE: this seems to be important during manual optimization

seed: 1234


