#!/bin/bash
#SBATCH --time=1-24:00:00
#SBATCH --ntasks=1
#SBATCH --mem=40G
#SBATCH --gres=gpu:A6000:1
#SBATCH --job-name=benchmark_tofu
#SBATCH --output=logs/benchmark_tofu_%A_%a.log   # Log file for each task
#SBATCH --error=logs/benchmark_tofu_error_%A_%a.log     # Error log file for each task



models=("microsoft/phi-1_5")

tasks=("tofu-qa")
samples=(100 200 500)

python -m src.benchmarking.benchmark \
    --model_name_or_path microsoft/phi-1_5 \
    --tasks tofu-qa \
    --ignore_chat_template

# Iterate over models and tasks
for n_sample in "${samples[@]}"; do
    # Run first benchmark command
    python -m src.finetuning.finetune \
        --model microsoft/phi-1_5 \
        --dataset tofu-qa \
        --n_samples $n_sample \
        --epochs 5 \
        --lr 2e-4 \
        --lora_rank 128 \
        --lora_alpha 16 \
        --weight_decay 0.01 \
        --eval_dataset tofu-qa \
        --n_skip_samples $n_sample \
        --dataset_text_field text \
        --batch_size 16
done