defaults:
  - training_scheme: PPO

device: "cpu"

optimization:
  lr: 5e-5
  batchsize: 128
  gae: 0.9

model:
  features: 19
  hidden_dim : 512
  depth: 2
  n_layers: 1

env:
  num_steps: 50
  decay: 0.99
  harden_gaps: 0.0
  num_rollouts: 8