defaults:
  - reward_model: default
  - dataset: default
  - _self_

seed: 52
wandb: False
epochs: 50
batch_size: 1
save_interval: 1

