seed: 42
torch_dtype: bfloat16

model_path: allenai/Llama-3.1-Tulu-3-8B-SFT
base_run_name: tulu8bsft-normed-preference_ultrafeedback_llama
base_output_dir: /XXXX-3/

lora:
  r: 64
  lora_alpha: 16
  lora_dropout: 0.1
  bias: none
  task_type: CAUSAL_LM

training:
  gradient_checkpointing: true
  gradient_checkpointing_kwargs:
    use_reentrant: false
  max_length: 2048
  per_device_train_batch_size: 4
  gradient_accumulation_steps: 1
  learning_rate: 2.0e-5
  lr_scheduler_type: linear
  warmup_ratio: 0.1
  num_train_epochs: 3
  report_to: "wandb"
  logging_steps: 1
  beta: 0.1
  save_steps: 1000
  normalize_logps: false
  bf16: true
  bf16_full_eval: true
