#!/bin/bash -l

##############################
#       Job blueprint        #
##############################

# Give your job a name, so you can recognize it in the queue overview
#SBATCH --job-name=eval ## CHANGE JOBNAME HERE
#SBATCH --array=0

# Remove one # to uncommment
#SBATCH --output=./joblog/%x-%A_%a.out                          ## Stdout
#SBATCH --error=./joblog/%x-%A_%a.err                           ## Stderr

# Define, how many nodes you need. Here, we ask for 1 node.
#SBATCH -N 1                                        ##nodes
#SBATCH -n 1                                        ##tasks
#SBATCH --cpus-per-task=16
#SBATCH --mem=150G
#SBATCH --time=0-24:00:00
#SBATCH --gres=gpu:4 --ntasks-per-node=1 -N 1
# Turn on mail notification. There are many possible self-explaining values:
# NONE, BEGIN, END, FAIL, ALL (including all aforementioned)
# For more values, check "man sbatch"
#SBATCH --mail-type=ALL
# Remember to set your email address here instead of nobody
#SBATCH --mail-user=hyen@princeton.edu

echo "Date              = $(date)"
echo "Hostname          = $(hostname -s)"
echo "Working Directory = $(pwd)"
echo ""
echo "Number of Nodes Allocated      = $SLURM_JOB_NUM_NODES"
echo "Number of Tasks Allocated      = $SLURM_NTASKS"
echo "Number of Cores/Task Allocated = $SLURM_CPUS_PER_TASK"
echo "Array Job ID                   = $SLURM_ARRAY_JOB_ID"
echo "Array Task ID                  = $SLURM_ARRAY_TASK_ID"
echo "Cache                          = $TRANSFORMERS_CACHE"

IDX=$SLURM_ARRAY_TASK_ID
NGPU=$SLURM_GPUS_ON_NODE
if [[ -z $SLURM_ARRAY_TASK_ID ]]; then
    IDX=6
    NGPU=1
fi
export OMP_NUM_THREADS=8

set -x
echo "Slurm IDX: $IDX"

source /scratch/gpfs/hyen/simple-evals/env/bin/activate

paths=(
    # /scratch/gpfs/DANQIC/models/Qwen3-8B
    /scratch/gpfs/DANQIC/models/Qwen3-32B
)

models=(
    # qwen3-8b
    qwen3-32b-enabled
)

n=300
seed=0
pre="hosted_vllm"
# pre="react_vllm"
port=8000

# export debug=1
export debug=''

i=0
for path in "${paths[@]}"; do
#for step in 10 20 30 40 50 60 70 80 90 100; do
    i=$((i+1))
    path=${paths[$IDX]}
    model=${models[$IDX]}

    #model=$model-step$step
    #path=$path/global_step_$step/policy

    if [ ! -d "$path" ]; then
        echo "Path $path does not exist, skipping..."
        continue
    fi

    echo "Serving $model from $path"
    vllm serve $path --hf-overrides '{"architectures": ["Qwen3ForCausalLM"]}' --served-model-name $model --port $port --dtype bfloat16 --tensor_parallel_size $NGPU --enforce-eager --enable-reasoning --reasoning-parser deepseek_r1 --enable-auto-tool-choice --tool-call-parser hermes --rope-scaling '{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' --max-model-len 131072  &
    # --uvicorn-log-level warning --disable-log-requests --disable-log-stats

    # wait for server to be ready
    until response=$(curl -s http://localhost:$port/v1/models) && echo "$response" | jq -e '.data | length > 0' >/dev/null 2>&1; do sleep 2; done; echo "API ready!"

    echo "--------------------------------Running eval--------------------------------"
    # python -m simple-evals.simple_evals --eval hle_text,browsecomp,healthbench_hard --model $model --output-dir simple-evals/outputs/${model} --tag "v1_${n}_${seed}" --examples $n
    for seed in {0..2}; do
        python -m simple-evals.simple_evals --eval healthbench_hard --n-threads 300 --model $pre-$model --output-dir simple-evals/outputs/$pre-${model} --tag "v1_${n}_${seed}" --examples $n --model_seed $seed
    done

    # kill the vllm server
    pkill -f "vllm serve $path"
    sleep 10
done

# python simple-evals/scripts/collect_results.py \
#     --models hosted_vllm-qwen3-8b \
#     --evals hle_text \
#     --output-dir simple-evals/outputs --tag v1_300 --seeds 100

