model_name: llama-2
model_path: "${BASEDIR}/tofu-llama-2-7b"
num_layer: 0
loss_type: "dpo_kl"
data_type: bfloat16
learning_rate: ${lr}
lr_scheduler_type: linear
weight_decay: 0.0
warmup_ratio: 0.1
remember_weight: 1.

# Default is no lora
Lora:
  r: 0
  alpha: 32
  dropout: 0.05
  bias: none
  task_type: CAUSAL_LM
