defaults:
  - _self_

llm_params:
  model_name: "llama2-7b"
  model_dtype: bfloat16
  model_path: "..."

lora_params:
    lora_checkpoint: '...'  # path for lora checkpoint

dpo_params:
  beta: 0.1
  per_device_train_batch_size: 8
  # per_device_eval_batch_size: 4
  num_train_epochs: 10
  bf16: true
  # max_prompt_length: 2048
  # max_length: 2048
  max_steps: 1000
  logging_steps: 2
  # eval_steps: 100


opt_params:
  lr: 5e-5
  lr_scheduler_type: 'constant'

output_dir: "./exp/local/${now:%Y.%m.%d}/${now:%H%M}"
log_dir: "./nohups/"
seed: 2023
run_name: "iterative-llama3-dpo"

data_params:
  data_base: '...'  # base path for dpo preference dataset
  data_name: '...csv'  # file name for dpo csv file
  data_path: '${data_params.data_base}/${data_params.data_name}'
  
