#!/bin/bash
set -euo pipefail

# GPU configuration
export CUDA_VISIBLE_DEVICES=0
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# Experiment configuration
experiment_names=("hotpotqa" "musique" "triviaqa" "popqa" "2wikimultihopqa" "bamboogle" "nq_search")
model_a_list=("ASearcher-Local-7B")
model_b_list=("inclusionAI/ASearcher-Local-7B")

for model_a in "${model_a_list[@]}"; do
    for model_b in "${model_b_list[@]}"; do
        model_b_id=$(echo "$model_b" | cut -d'/' -f2)
        for experiment_name in "${experiment_names[@]}"; do
            echo "Rerunning ${model_a} on ${experiment_name} with ${model_b_id}"
            python ../evaluation/rerun_evaluation.py \
                eval_results/${experiment_name}/final_processed_${model_a}.jsonl \
                eval_results/${experiment_name}/rerun_processed_${model_b_id}-${model_a}.jsonl \
                --model_name ${model_b} \
                --local_inference \
                --local_batch_size 64
        done
    done
done
