python scripts/classification_inference.py --dataset arc
python scripts/classification_inference.py --dataset mmlu
python scripts/classification_inference.py --dataset mixeval
python scripts/free_form_inference.py --dataset mmlu
python scripts/free_form_inference.py --dataset gsm8k

cd data
wget https://huggingface.co/datasets/withmartian/routerbench/blob/main/routerbench_0shot.pkl
wget https://huggingface.co/datasets/withmartian/routerbench/blob/main/routerbench_5shot.pkl
cd ..

python scripts/preprocess.py

python scripts/classification.py --models 0,1,2
python scripts/classification.py --models 3,4,5
python scripts/classification.py --models 6,7,8

python scripts/free_form.py --models 0,1,2
python scripts/free_form.py --models 3,4,5
python scripts/free_form.py --models 6,7,8

python scripts/routerbench.py --models 9,4,5 --noise-level low &
python scripts/routerbench.py --models 9,4,5 --noise-level medium &
python scripts/routerbench.py --models 9,4,5 --noise-level high
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level low  &
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level medium &
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level high
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low &
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium &
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high
python scripts/routerbench.py --models 9,4,5 --noise-level low --few-shot &
python scripts/routerbench.py --models 9,4,5 --noise-level medium --few-shot &
python scripts/routerbench.py --models 9,4,5 --noise-level high --few-shot
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level low  --few-shot &
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level medium --few-shot &
python scripts/routerbench.py --models 0,9,4,3,5 --noise-level high --few-shot
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low  --few-shot &
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium --few-shot &
python scripts/routerbench.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high --few-shot

python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium --no-speedup
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium --greedy
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level medium --sigma-none

python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low --no-speedup
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low --greedy
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level low --sigma-none

python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high --no-speedup
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high --greedy
python scripts/routerbench_times.py --models 0,1,2,3,4,5,6,7,8,9,10 --noise-level high --sigma-none
