#!/usr/bin/env bash
# file: run_loop.sh

# Write the tasks and models to iterate through into arrays
#pair_jailbreakbench pair_strongreject pair_harmbench pap_jailbreakbench pap_strongreject pap_harmbench
#HEx-PHI
#"JailbreakBench Strongreject Harmbench"
TASKS=("JailbreakBench" "Strongreject" "Harmbench")
# Add the paths to the models you want to evaluate here
MODELS=()
# Add the names of the models here
MODELS_NAME=()

# Get array lengths
num_tasks=${#TASKS[@]}
num_models=${#MODELS[@]}

for ((i=0; i<num_tasks; i++)); do
  for ((j=0; j<num_models; j++)); do
    task=${TASKS[i]}
    model=${MODELS[j]}
    model_name=${MODELS_NAME[j]}
    echo ">>>> Starting: python model_eval.py --task $task --model $model_name"
    python model_eval.py \
        --model_path $model \
        --target_name $model_name \
        --tasks $task \
        --backdoor None \
        --judge "strongreject_judge" \
        --output_path ./ \
        --num_generate 8
    rc=$?
    if [ $rc -ne 0 ]; then
        echo "<<<< Failed! task=$task model=$model exit code $rc, script terminated."
        exit $rc
    fi
  done
done
echo "==== All completed ===="



