#!/bin/bash
# OlymMATH Hard pass@32 eval on novelty_production_gspo_topk100_a01 step 15.

set -e
PROJECT_DIR="${PROJECT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
cd "$PROJECT_DIR"
source "${VLLM_VENV_PATH:-$HOME/verl-vllm012}/bin/activate"

export WANDB_CONSOLE=off
export PYTHONUNBUFFERED=1
export VLLM_USE_TRTLLM_ATTENTION=0
export VLLM_ATTENTION_BACKEND=FLASH_ATTN
export VLLM_USE_V1=1

MODEL_PATH="checkpoints/olmo3-puzzle-grpo/novelty_production_gspo_topk100_a01/merged_step_15"
CUSTOM_TASKS_PATH="evaluate/custom_tasks"
OUTPUT_DIR="results/novelty_prod_s15_math_eval_diverse"
OUT_SUBDIR="${OUTPUT_DIR}/novelty_s15"
LOG_FILE="${PROJECT_DIR}/logs/novelty_s15_olymp_eval.log"

mkdir -p "${OUT_SUBDIR}"

model_args="pretrained=${MODEL_PATH}"
model_args="${model_args},tensor_parallel_size=1"
model_args="${model_args},data_parallel_size=8"
model_args="${model_args},gpu_memory_utilization=0.85"
model_args="${model_args},max_model_len=26000"

echo "=========================================="
echo "OlymMATH Hard pass@32: novelty_production_gspo_topk100_a01 s15"
echo "=========================================="
echo "Model: ${MODEL_PATH}"
echo "Output: ${OUT_SUBDIR}"
echo ""

python scripts/evals/lm_eval_dp_diverse.py \
    --model vllm \
    --model_args "${model_args}" \
    --include_path ${CUSTOM_TASKS_PATH} \
    --tasks olymp_math_hard_pass32 \
    --batch_size auto \
    --apply_chat_template \
    --seed 42 \
    --output_path "${OUT_SUBDIR}" \
    --log_samples \
    > "${LOG_FILE}" 2>&1

rc=$?
if [ ${rc} -eq 0 ]; then
    echo "[DONE] OlymMATH Hard pass@32 complete"
    python scripts/evals/compute_pass_at_k.py "${OUT_SUBDIR}" \
        --k_values 1,2,4,8,16,32 \
        --cons_k 8,32 \
        --json_output "${OUTPUT_DIR}/novelty_s15_pass_at_k.json"
else
    echo "[FAIL] exit ${rc} — see ${LOG_FILE}"
    exit ${rc}
fi
