#!/bin/bash

OUTPUT_DIR="./output/eval/nlu_tasks_v2"

# Read checkpoint paths from the configuration file
CHECKPOINTS=()
while IFS= read -r line; do
    # Skip commented lines and empty lines
    if [[ "$line" =~ ^#.*$ ]] || [[ -z "$line" ]]; then
        continue
    fi
    CHECKPOINTS+=("$line")
done < ./output/checkpoint.cfg

echo CHECKPOINTS: ${CHECKPOINTS[@]}

CUSTOM_TASKS="./experiment/eval/lighteval_tasks.py"
MAX_SAMPLES=10000

# Define tasks
TASKS=(
    "custom|hellaswag|0|1"
    "custom|winogrande|0|1"
    "custom|piqa|0|1"
    "custom|siqa|0|1"
    "custom|openbookqa|0|1"
    "custom|arc:easy|0|1"
    "custom|arc:challenge|0|1"
    "custom|commonsense_qa|0|1"
    # "custom|mmlu:abstract_algebra|0|1"
    # "custom|mmlu:anatomy|0|1"
    # "custom|mmlu:astronomy|0|1"
    # "custom|mmlu:business_ethics|0|1"
    # "custom|mmlu:clinical_knowledge|0|1"
    # "custom|mmlu:college_biology|0|1"
    # "custom|mmlu:college_chemistry|0|1"
    # "custom|mmlu:college_computer_science|0|1"
    # "custom|mmlu:college_mathematics|0|1"
    # "custom|mmlu:college_medicine|0|1"
    # "custom|mmlu:college_physics|0|1"
    # "custom|mmlu:computer_security|0|1"
    # "custom|mmlu:conceptual_physics|0|1"
    # "custom|mmlu:econometrics|0|1"
    # "custom|mmlu:electrical_engineering|0|1"
    # "custom|mmlu:elementary_mathematics|0|1"
    # "custom|mmlu:formal_logic|0|1"
    # "custom|mmlu:global_facts|0|1"
    # "custom|mmlu:high_school_biology|0|1"
    # "custom|mmlu:high_school_chemistry|0|1"
    # "custom|mmlu:high_school_computer_science|0|1"
    # "custom|mmlu:high_school_european_history|0|1"
    # "custom|mmlu:high_school_geography|0|1"
    # "custom|mmlu:high_school_government_and_politics|0|1"
    # "custom|mmlu:high_school_macroeconomics|0|1"
    # "custom|mmlu:high_school_mathematics|0|1"
    # "custom|mmlu:high_school_microeconomics|0|1"
    # "custom|mmlu:high_school_physics|0|1"
    # "custom|mmlu:high_school_psychology|0|1"
    # "custom|mmlu:high_school_statistics|0|1"
    # "custom|mmlu:high_school_us_history|0|1"
    # "custom|mmlu:high_school_world_history|0|1"
    # "custom|mmlu:human_aging|0|1"
    # "custom|mmlu:human_sexuality|0|1"
    # "custom|mmlu:international_law|0|1"
    # "custom|mmlu:jurisprudence|0|1"
    # "custom|mmlu:logical_fallacies|0|1"
    # "custom|mmlu:machine_learning|0|1"
    # "custom|mmlu:management|0|1"
    # "custom|mmlu:marketing|0|1"
    # "custom|mmlu:medical_genetics|0|1"
    # "custom|mmlu:miscellaneous|0|1"
    # "custom|mmlu:moral_disputes|0|1"
    # "custom|mmlu:moral_scenarios|0|1"
    # "custom|mmlu:nutrition|0|1"
    # "custom|mmlu:philosophy|0|1"
    # "custom|mmlu:prehistory|0|1"
    # "custom|mmlu:professional_accounting|0|1"
    # "custom|mmlu:professional_law|0|1"
    # "custom|mmlu:professional_medicine|0|1"
    # "custom|mmlu:professional_psychology|0|1"
    # "custom|mmlu:public_relations|0|1"
    # "custom|mmlu:security_studies|0|1"
    # "custom|mmlu:sociology|0|1"
    # "custom|mmlu:us_foreign_policy|0|1"
    # "custom|mmlu:virology|0|1"
    # "custom|mmlu:world_religions|0|1"
)

# Join tasks into a comma-separated string
TASKS_STR=$(IFS=,; echo "${TASKS[*]}")


# Loop through each checkpoint and run evaluation
for ckpt in "${CHECKPOINTS[@]}"; do
    echo "Evaluating checkpoint: ${ckpt}"
    lighteval accelerate \
        --model_args="pretrained=${ckpt}" \
        --custom_tasks "${CUSTOM_TASKS}" \
        --output_dir "${OUTPUT_DIR}" \
        --max_samples "${MAX_SAMPLES}" \
        --tasks "${TASKS_STR}"
done
