#!/bin/bash

# Check if dataset is provided
if [ $# -lt 1 ]; then
    echo "Usage: $0 <dataset>"
    echo "  <dataset>: gsm8k, math500, collegemath, aime2024, or aime2025"
    echo "Example: $0 gsm8k"
    exit 1
fi

DATASET=$1

# Validate dataset
if [[ "$DATASET" != "gsm8k" && "$DATASET" != "math500" && "$DATASET" != "collegemath" && "$DATASET" != "aime2024" && "$DATASET" != "aime2025" ]]; then
    echo "Invalid dataset: $DATASET"
    echo "Valid options: gsm8k, math500, collegemath, aime2024, aime2025"
    exit 1
fi

# Run the evaluation
# The evaluation script will automatically detect whether to use self-consistency or self-refinement
# based on the directory name (sr_ for self-refinement, sc_ for self-consistency)
python -m src.evaluation_self --dataset "$DATASET" --sc

echo "Self-agent evaluation completed for $DATASET. Results saved in respective directories." 