defaults:
  - policy_model: default
  - q_model: default
  - v_model: default
  - dataset: default
  - _self_

seed: 0
wandb: True
value_epochs: 10
policy_epochs: 40
batch_size: 32
save_interval: 5
eval_episodes: 5
horizon: 200