# Set environment variables
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export HF_ENDPOINT=https://hf-mirror.com
# Define the array of fewshot values, datasets, and output path
fewshot_values=(5 4 3 2 1)
datasets=("mmlu" "mmlu_generative" "math_hard" "mmlu_pro")  # Add your datasets here
output_path=$1

# Define the models to iterate over
models=("Meta-Llama-3.2-3B-Instruct")

# Function to run the evaluation with optional --fewshot_as_multiturn
run_evaluation() {
  local model=$1
  local dataset=$2
  local fewshot=$3
  local multiturn_flag=$4

  if [ "$multiturn_flag" = "true" ]; then
    multiturn_option="--fewshot_as_multiturn"
  else
    multiturn_option=""
  fi

  # Run the evaluation
  lm_eval \
    --model vllm \
    --model_args pretrained=/huggingface/models/"$model",data_parallel_size=8,dtype=bfloat16,enable_chunked_prefill=True \
    --tasks "$dataset" \
    --batch_size 16 \
    --log_samples \
    --output_path "$output_path" \
    --apply_chat_template --num_fewshot "$fewshot" $multiturn_option
}

# Iterate over models, datasets, and fewshot values
for model in "${models[@]}"; do
  for dataset in "${datasets[@]}"; do
    for fewshot in "${fewshot_values[@]}"; do
      # Run with --fewshot_as_multiturn
      run_evaluation "$model" "$dataset" "$fewshot" "true"

      # Run without --fewshot_as_multiturn
      run_evaluation "$model" "$dataset" "$fewshot" "false"
    done
  done
done
