#!/bin/bash

# Check if dataset is provided
if [ $# -lt 1 ]; then
    echo "Usage: $0 <dataset>"
    echo "  <dataset>: gsm8k, math500, collegemath, or aime2024"
    echo "Example: $0 gsm8k"
    exit 1
fi

DATASET=$1

# Validate dataset
if [[ "$DATASET" != "gsm8k" && "$DATASET" != "math500" && "$DATASET" != "collegemath" && "$DATASET" != "aime2024" && "$DATASET" != "aime2025" ]]; then
    echo "Invalid dataset: $DATASET"
    echo "Valid options: gsm8k, math500, collegemath, aime2024, aime2025"
    exit 1
fi

# Run the evaluation
python -m src.evaluation_multi --dataset "$DATASET"

echo "Multi-agent evaluation completed for $DATASET. Results saved in respective directories." 