python scripts/train/sft.py \
    --data_dir data/alfworld/sft/iter0 \
    --output_dir save/tmp/2407/alfworld_sft/iter0 \
    --model_id meta-llama/Meta-Llama-3-8B-Instruct \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --num_train_epochs 1 \
    --per_device_train_batch 4 \
    --gradient_checkpointing True \
    --max_seq_length 8000 \
    --packing False \
    --torch_dtype bfloat16 \
    --optim adamw_torch_fused \
    --evaluation_strategy steps \
    --eval_steps 200 \
    --save_strategy steps \
    --save_steps 200 \
    --save_total_limit 3 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_loss \
    --use_peft True \
    --lora_alpha 64 \
    --lora_r 128 \
    --lora_dropout 0.05 \
    --lr_scheduler_type cosine \
    --max_grad_norm 0.3 \
    --warmup_steps 10 \
    --bf16 \
    --seed 42 \
    --report_to wandb \
    --logging_first_step \
    --logging_steps 10 \
    --push_to_hub False \