save_model: true
model_family: phi

LoRA:
  r: 0
  alpha: 32
  dropout: 0.05

data_path: locuslab/TOFU
split: full
batch_size: 4
gradient_accumulation_steps: 1
num_epochs: 5
lr: 1e-5
save_dir: ./checkpoints/ft_epoch${num_epochs}_lr${lr}_${model_family}_${split}_wd${weight_decay}

weight_decay: 0.01
seed: 100
use_flash_attention_2: true
# Parallelism settings
parallelism:
  strategy: pipeline # Options: "pipeline", "tensor", "zero", or null for default
  num_stages: 4       # For pipeline parallelism, split the model into N stages

# Optional: Enable profiling
profiling:
  enabled: false       # Set to true to enable performance profiling during training