#!/bin/bash
export VLLM_DISABLE_COMPILE_CACHE=1
model_name=$1
# general_bench 开关：控制是否执行 supergpqa, bbeh, mmlupro 评估（默认开启）
# 用法：bash evaluate.bash <model_name> [general_bench=true|false]
general_bench=${2:-true}

# 创建 log 目录（如果不存在）
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LOG_DIR="${SCRIPT_DIR}/log"
mkdir -p "${LOG_DIR}"

# 生成带时间戳的日志文件名
TIMESTAMP=$(date '+%Y%m%d_%H%M%S')
MODEL_NAME_SAFE=$(echo "${model_name}" | tr '/' '_')
LOG_FILE="${LOG_DIR}/evaluate_${MODEL_NAME_SAFE}_${TIMESTAMP}.log"

# 使用 tee 将所有输出同时发送到终端和日志文件
exec > >(tee -a "${LOG_FILE}") 2>&1

echo "==> Log file: ${LOG_FILE}"
echo "==> Start time: $(date '+%Y-%m-%d %H:%M:%S')"
echo "========================================"

MODEL_NAMES=(
  $model_name
)

TASKS=(
  "math"
  "gsm8k" 
  "amc"
  "minerva"
  "olympiad"
  "aime2024"
  "aime2025"
)

# 如果设置了 CUDA_VISIBLE_DEVICES，则使用指定的 GPU；否则使用所有可用 GPU
if [ -n "${CUDA_VISIBLE_DEVICES}" ]; then
    IFS=',' read -ra GPU_QUEUE <<< "${CUDA_VISIBLE_DEVICES}"
    echo "Using GPUs from CUDA_VISIBLE_DEVICES: ${GPU_QUEUE[@]}"
else
    GPU_QUEUE=($(nvidia-smi --query-gpu=index --format=csv,noheader))
    echo "Available GPUs: ${GPU_QUEUE[@]}"
fi

declare -A pids

start_job() {
  local gpu_id="$1"
  local model="$2"
  local task="$3"

  echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Start task [${task}] with model [${model}] on GPU [${gpu_id}] ..."

  CUDA_VISIBLE_DEVICES="${gpu_id}" \
  python -m evaluation.generate --model "${model}" --dataset "${task}" &

  pids["${gpu_id}"]=$!
}

for MODEL_NAME in "${MODEL_NAMES[@]}"; do
    echo "==> Processing model: ${MODEL_NAME}"
    TASK_INDEX=0
    NUM_TASKS=${#TASKS[@]}

    while :; do
        while [ ${#GPU_QUEUE[@]} -gt 0 ] && [ ${TASK_INDEX} -lt ${NUM_TASKS} ]; do
            gpu_id="${GPU_QUEUE[0]}"
            GPU_QUEUE=("${GPU_QUEUE[@]:1}")

            task="${TASKS[${TASK_INDEX}]}"
            ((TASK_INDEX++))

            start_job "$gpu_id" "$MODEL_NAME" "$task"
        done

        if [ ${TASK_INDEX} -ge ${NUM_TASKS} ] && [ ${#pids[@]} -eq 0 ]; then
            break
        fi

        for gpu_id in "${!pids[@]}"; do
            pid="${pids[$gpu_id]}"
            if ! kill -0 "$pid" 2>/dev/null; then
                echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] GPU [${gpu_id}] job finished with PID [${pid}]."
                unset pids["$gpu_id"]
                GPU_QUEUE+=("$gpu_id")
            fi
        done

        sleep 1
    done
done

python -m evaluation.results_recheck --model_name $model_name &

# 根据 general_bench 开关决定是否执行通用评估任务
if [ "${general_bench}" = "true" ]; then
    echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Running general benchmarks (supergpqa, bbeh, mmlupro)..."
    python -m evaluation.eval_supergpqa --model_path $model_name
    python -m evaluation.eval_bbeh --model_path $model_name
    python -m evaluation.eval_mmlupro --model_path $model_name
else
    echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Skipping general benchmarks (general_bench=false)"
fi

echo "========================================"
echo "==> End time: $(date '+%Y-%m-%d %H:%M:%S')"
echo "==> All tasks have finished!"
echo "==> Log saved to: ${LOG_FILE}"
