model_id: "meta-llama/Llama-3.2-3B"
max_seq_length:  1024
output_dir_base: "./checkpoints/llama_instruct_ft"

# Hugging Face training arguments
per_device_train_batch_size: 1
gradient_checkpointing: true
gradient_accumulation_steps: 16
max_grad_norm: 0.3
warmup_ratio: 0.03
max_steps: 1875
learning_rate: 0.00004
lr_scheduler_type: "constant"
bf16: true
tf32: true
optim: "paged_adamw_32bit"
logging_steps: 10
logging_dir: "./logs"
save_strategy: "steps"
save_steps: 500
save_total_limit: 5
eval_steps: 250
do_train: true
do_eval: true
report_to: ""
per_device_eval_batch_size: 1

# LoRA config:
lora_r: 64
lora_alpha: 16
use_rslora: false
lora_dropout: 0.1
lora_bias: "none"
task_type: "CAUSAL_LM"
target_modules: "all-linear"

# Data args:
dataset: "unnatural_instruct"
prompt_format: "guanaco"
only_train_on_completions: false
