# supports distilbert-base-cased, roberta-large, gpt2-xl, facebook/opt-2.7b, facebook/opt-6.7b
# algorithm list - {'FO-SGD', 'FO-Adam', 'ZO', 'ZOSVRG'}
# tasks supported - {'mnli', 'sst2', 'qnli', 'cola'}
# only single gpu experiments are supported. Indicate the device you want run this on
# full_parameter tag does full-parameter fine-tuning. Remove for partial fine-tuning

# For ZO methods, "batchsize" argument is effective batch size after accumulation
# and "batchsize_limit" is true batch size. For FO, ignore batchsize_limit argument
# results argument takes path to store dictionary of results (Losses, Accuracies, Training Time etc.) 
# lr argument is \eta in paper or \eta_1 for MeZO-SVRG
# lr_mezosvrg_mb is \eta_2

mkdir -p logs
{
    echo 'Running ZO-SGD on GPU 0'
    python finetune_llm.py \
        --epochs 2000 \
        --samplesize 256 \
        --samplesize_validation 128 \
        --model_name 'gpt2' \
        --task 'sst2' \
        --max_seq_length 128 \
        --full_parameter \
        --algorithm 'ZO' \
        --batchsize 32 \
        --batchsize_limit 32 \
        --lr 1e-5 \
        --perturbation_scale 1e-3 \
        --lr_mezosvrg_mb 1e-5\
        --device 0 \
        --results 'data/20250909/gpt2_small' \
        > logs/20250909/gpt2_small_MeZO_lr1e5.log 2>&1 
}
wait
echo "Batch 1 completed"
echo 'All experiments completed'
