#!/bin/bash
# File: evaluate_openai.sh
# Description: Evaluate the performance of OpenAI models on different datasets

DATASETS=("snli" "multinli" "mtbench" "summeval")
MODEL_NAMES=("gpt-4o-mini" "gpt-4o")
THREADS=4

PYTHON_EXEC="YOUR_PYTHON_EXEC"

for dataset in "${DATASETS[@]}"; do
    mkdir -p "evaluation_results/raw/${dataset}/openai"
done

echo "======================"
echo "Starting OpenAI model evaluation"
echo "Datasets: ${DATASETS[*]}"
echo "Models: ${MODEL_NAMES[*]}"
echo "Threads: ${THREADS}"
echo "======================"

$PYTHON_EXEC ./evaluate.py \
    --model_type openai \
    --datasets "${DATASETS[@]}" \
    --model_names "${MODEL_NAMES[@]}" \
    --threads ${THREADS}

echo "OpenAI model evaluation completed!"