#!/bin/bash
#SBATCH --time=1-24:00:00
#SBATCH --ntasks=1
#SBATCH --mem=40G
#SBATCH --gres=gpu:A6000:1
#SBATCH --job-name=benchmark_perp
#SBATCH --output=logs/benchmark_perp_%A_%a.log   # Log file for each task
#SBATCH --error=logs/benchmark_perp_error_%A_%a.log     # Error log file for each task


# Declare arrays correctly
models=(
    "HuggingFaceH4/zephyr-7b-beta" 
    "cais/Zephyr_RMU"
    "J4Q8/zephyr-npo-bio"
    '/data/llm_weights/soft_mem/unlearning_vs_safety/cais_Zephyr_RMU/relearn_wmdp_bio-retain-corpus_5' 
    '/data/llm_weights/soft_mem/unlearning_vs_safety/cais_Zephyr_RMU/relearn_wmdp_bio-retain-corpus_10'
    '/data/llm_weights/soft_mem/unlearning_vs_safety/cais_Zephyr_RMU/relearn_wmdp_bio-retain-corpus_50'
    '/data/llm_weights/soft_mem/unlearning_vs_safety/J4Q8_zephyr-npo-bio/relearn_wmdp_bio-retain-corpus_5'
    '/data/llm_weights/soft_mem/unlearning_vs_safety/J4Q8_zephyr-npo-bio/relearn_wmdp_bio-retain-corpus_10'
    '/data/llm_weights/soft_mem/unlearning_vs_safety/J4Q8_zephyr-npo-bio/relearn_wmdp_bio-retain-corpus_50'
)

tasks=("wmdp-bio")

# Iterate over models and tasks
for model in "${models[@]}"; do
    for task in "${tasks[@]}"; do
        # Run first benchmark command
        python -m src.benchmarking.perp_eval \
            --model_name_or_path "$model" \
            --tasks "$task" \
            --append_options \
            --wandb_project "benchmarking" \
            --batch_size 1 \
            --tokenizer "HuggingFaceH4/zephyr-7b-beta" 
        
        # Run second benchmark command
        python -m src.benchmarking.perp_eval \
            --model_name_or_path "$model" \
            --tasks "$task" \
            --wandb_project "benchmarking" \
            --batch_size 1 \
            --tokenizer "HuggingFaceH4/zephyr-7b-beta" 
    done
done