# python eval/evaluate.py --save --model "Llama-3.2-3B-Instruct" --split "answerable-full" --n-shots 1
# eval/runs/Llama-3.1-8B-Instruct_answerable-partial_all-tools_0-shot.jsonl
python eval/evaluate.py --save --model "Qwen3-30B-A3B" --split "answerable-full"
