
project="llm_verifier"
mem_size="80g"
ngpucores="2+1"
require="a100_80gb"
logs_dir=logs

scheduler="jbsub -proj $project -cores $ngpucores -mem $mem_size -require $require"

declare -a agent_types=("react" "react_ablation")
declare -a model_names=("mpt-7b-instruct" "mpt-30b-instruct")
declare -a num_fs_exs=$(seq 0 3)
declare -a datasets=("fever_v1.0" "hotpot_qa" "gsm8k_main")
declare -a datasets=("gsm8k_main")

# test number of prompts
for dataset in "${datasets[@]}"
do
    for model_name in "${model_names[@]}"
    do
        model_type="mosaicml/$model_name"
        for num_ex in $num_fs_exs
        do
            for agent_type in "${agent_types[@]}"
            do

                if [ $dataset = "hotpot_qa" ]; then
                    declare -a splits=("dev")
                elif [ $dataset = "gsm8k_main" ]; then
                    declare -a splits=("test")
                else
                    declare -a splits=("paper_dev")
                fi 

                if [ $dataset = "gsm8k_main" ]; then
                    amt=1500
                else
                    amt=500
                fi

                if [[ $model_name == "mpt-30b-instruct" ]]; then
                    queue="x86_12h"
                else
                    queue="x86_6h"
                fi

                for split in "${splits[@]}"
                do
                    name_str="${dataset}_${num_ex}_${model_name}_${agent_type}_${split}_${amt}"
                    $scheduler -queue $queue -name $name_str -out ${logs_dir}/${name_str}_log.txt \
                        python -m src.evaluate \
                        --model_type $model_type \
                        --agent_type $agent_type \
                        --dataset $dataset \
                        --dataset_size $amt \
                        --split $split \
                        --few_shot_k $num_ex \
                        --debug
                done
            done
        done
    done
done
