defaults:
  - _self_
  - policy@_global_: default
  - task@_global_: gsm8k
  - base_model@_global_: llama3i8b
  - optimization@_global_: reinforce
  - mode@_global_: training
  - run@_global_: default

num_iters: 2000
test_interval: 10
lr: 2e-3
batch_size: 256
seed: 42
init_val: 0.1
test_only: false
model_dir: null
save_legacy_params: false
use_lora: false

run_name: null

load_ckpt: null
exp_suffix: 'st'

exp_name: ${base_model_name}/${optim_name}-${exp_suffix}

wandb_log: true # enabled by default
wandb_project: shakeoff
wandb_group_name: ${exp_name}
extract_svd: false

out_dir: results

hydra:
  run:
    dir: ${out_dir}/