defaults: 
  - _self_ 
  - user: ab
  - task: gsm8k 
  - policy: gemma-2-2b
  - reward: oasst-rm

metrics: 
  - ${task.rstar_keys}
  - ${reward.name}
  - no-response
  # - kl
  # - response

supplemental_files:
  - nums

method_list: 
  - piref
  - bon
  - rejection-holdout-False

ks: 
  kmax: 13
  kmin: 2
  inc: 1

betas: 
  - 0.0001
  - 0.0005
  - 0.001
  - 0.005
  - 0.01
  - 0.05
  - 0.1
  - 0.5
  # - 0.00001
  # - 0.00003
  # - 0.0001
  # - 0.0003
  # - 0.001
  # - 0.003
  # - 0.01
  # - 0.03
  # - 0.1
  # - 0.3
  # - 0.5

root: ${user.root}

multiprocessing: True
refresh_data: False
std_over_prompts: True

io: 
  prefix: ${task.name}-${policy.name}--shots-0-${reward.name}
  load_root: ${root}/results
  save_root: ${root}/figures
  save_prefix: null











