#!/bin/bash

model_path=$1
selected_tasks=$2  # 第二个参数为指定的任务，多个任务用逗号分隔，例如 "mmlu,bbh"

# 提取 model_path 的最后一个部分作为 model_name
model_name=$(basename "$model_path")

echo "----------------------------"
echo "Job Name: ${job_name:-run_nait}"
echo "Selected Tasks: $selected_tasks"
echo "Model Path: $model_path"
echo "Model Name: $model_name"
echo "----------------------------"

# 将逗号分隔的任务转换为数组
IFS=',' read -r -a tasks < <(echo "$selected_tasks")

# 定义一个函数运行特定任务
run_task() {
  case $1 in
    mmlu)
      echo "Running MMLU evaluation..."
      HF_ENDPOINT=https://hf-mirror.com python -m eval.mmlu.run_eval \
          --ntrain 5 \
          --data_dir ../datasets/eval_dataset/mmlu/data \
          --save_dir results/${model_name}/mmlu \
          --model_name_or_path ${model_path} \
          --tokenizer_name_or_path ${model_path} \
          --eval_batch_size 256 \
          --use_vllm
      ;;
    gsm)
      echo "Running GSM evaluation..."
      HF_ENDPOINT=https://hf-mirror.com python -m eval.gsm.run_eval \
          --data_dir ../datasets/eval_dataset/gsm \
          --max_num_examples 2000 \
          --save_dir results/${model_name}/gsm \
          --model_name_or_path ${model_path} \
          --tokenizer ${model_path} \
          --n_shot 8 \
          --use_vllm
      ;;
    bbh)
      echo "Running BBH evaluation..."
      HF_ENDPOINT=https://hf-mirror.com python -m eval.bbh.run_eval \
          --data_dir ../datasets/eval_dataset/BBH \
          --save_dir results/${model_name}/bbh \
          --model ${model_path} \
          --tokenizer ${model_path} \
          --max_num_examples_per_task 40 \
          --use_vllm
      ;;
    tydiqa)
      echo "Running TyDiQA evaluation..."
      HF_ENDPOINT=https://hf-mirror.com python -m eval.tydiqa.run_eval \
          --data_dir ../datasets/eval_dataset/tydiqa/ \
          --n_shot 1 \
          --max_num_examples_per_lang 100 \
          --max_context_length 512 \
          --save_dir results/${model_name}/tydiqa \
          --model ${model_path} \
          --tokenizer ${model_path} \
          --eval_batch_size 20 \
          --use_vllm
      ;;
    humaneval)
      echo "Running HumanEval evaluation..."
      HF_ENDPOINT=https://hf-mirror.com python -m eval.codex_humaneval.run_eval \
          --data_file ../datasets/eval_dataset/human_eval/HumanEval.jsonl.gz \
          --data_file_hep ../datasets/human_eval/HumanEval.jsonl \
          --max_num_examples 200 \
          --model_name_or_path ${model_path} \
          --tokenizer_name_or_path ${model_path} \
          --save_dir results/${model_name}/codex_eval \
          --eval_batch_size 1 \
          --unbiased_sampling_size_n 20 \
          --temperature 0.8 \
          --eval_pass_at_ks 10 \
          --use_vllm
      ;;
    *)
      echo "Unknown task: $1"
      ;;
  esac
}

# 遍历传入的任务数组并运行对应任务
for task in "${tasks[@]}"; do
  run_task $task
done