# define a list of common huggingface models

MODELS=(
  "meta-llama/Llama-3-70b-chat-hf"
  "Qwen/Qwen1.5-110B-Chat"
  "meta-llama/Meta-Llama-3-70B"
  "mistralai/Mixtral-8x22B-Instruct-v0.1"
  "allenai/OLMo-7B-Instruct"
  "mistralai/Mixtral-8x7B-Instruct-v0.1"
  "Qwen/Qwen1.5-72B-Chat"
  "meta-llama/Llama-2-70b-chat-hf"
  "zero-one-ai/Yi-34B"
  "zero-one-ai/Yi-6B"
)
# define a list of tasks
TASKS=(
    "mathqa"
    "arc_normal"
    "arc_rephrase"
    "arc_synthetic"
    "arc_no_cont"
    "sciq"
    "hellaswag_normal"
    "hellaswag_rephrase"
    "hellaswag_synthetic"
    "hellaswag_no_cont"
)

# loop over the models and tasks
for task in "${TASKS[@]}"; do
  for model in "${MODELS[@]}"; do
      # skip if output exists
      python fix_acc_norm.py --model $model --benchmark $task
    done
done