#!/bin/bash

# Check if an iteration id is provided
if [ $# -eq 0 ]
then
    echo "No iteration id provided. Usage: $0 <iter_id>"
    exit 1
fi

iter_id=$1
prev_iter_id=$((iter_id-1))

# set variables
learning_rate=1e-7
beta=0.5
num_train_epochs=0.1

save_prefix="save/240817/"
save_model_name="alfworld_dpo_lr${learning_rate}_bt${beta}_ep${num_train_epochs}/iter${iter_id}"

echo "Iteration id: $1"
echo "Saving model as: ${save_model_name}"

python scripts/train/dpo.py \
    --data_dir data/alfworld/pref/iter${iter_id} \
    --output_dir ${save_prefix}/${save_model_name} \
    --model_id anonymous/Meta-Llama-3-8B-Instruct-sft-alfworld-iter${prev_iter_id} \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 8 \
    --num_train_epochs ${num_train_epochs} \
    --force_use_ref_model False \
    --gradient_checkpointing True \
    --max_length 2048 \
    --max_prompt_length 1024 \
    --torch_dtype bfloat16 \
    --optim adamw_torch_fused \
    --learning_rate ${learning_rate} \
    --eval_strategy steps \
    --eval_steps 50 \
    --save_strategy steps \
    --save_steps 50 \
    --save_total_limit 3 \
    --load_best_model_at_end True \
    --metric_for_best_model eval_loss \
    --use_peft True \
    --beta ${beta} \
    --lora_alpha 64 \
    --lora_r 128 \
    --lora_dropout 0.05 \
    --lr_scheduler_type cosine \
    --max_grad_norm 0.3 \
    --warmup_steps 10 \
    --bf16 \
    --seed 42 \
    --report_to tensorboard \
    --logging_first_step \
    --logging_steps 10 \
    --push_to_hub False \