#!/usr/bin/env bash
set -u
set -e

# models
model_path=/path/to/trained/models
models=(RISE-1.5B RISE-3B RISE-7B)

# checkpoint (default: none)
ckpt=none

# chat template
chat_template=default

# evaluation benchmarks
dataset_path=../../data/benchmarks
DATASET_NAME=(
    "MATH500"
    "AIME2024"
    "AMC2023"
    "minerva_math"
    "olympiadbench" 
)


# gen config
greedy=false
gen=8
temp=1.0
top_p=1

system_prompt="Please reason step by step, and put your final answer within \\boxed{}."
postfix_prompt=""

self_verify=true


# judge config
judge=rule_based

# loop over models and benchmarks
for model in "${models[@]}"; do
    for i in "${!DATASET_NAME[@]}"; do
        model_dir=$model_path/$model
        dataset_file=$dataset_path/${DATASET_NAME[$i]}.jsonl
        bash ./base_eval.sh $model_dir $ckpt $chat_template $dataset_file $greedy $gen $temp $top_p "$system_prompt" "$postfix_prompt" $self_verify $judge
    done
done
