
cd ..


tasks=("AIME_2024" "AIME_2025" "GameOf24" "GPQA_Diamond" "MMLU_Pro_Engineering")
for task in "${tasks[@]}"; do
    python run_benchmark.py --task "$task" --approach "DynamicCheatsheet_RetrievalSynthesis" \
        --model_name "openai/gpt-4o" \
        --save_directory "TEST_RESULTS" \
        --max_n_samples 100 \
        --faithfulness_experiment \
        --insights_modification_type "None" \
        --fewshot_modification_type "empty" 
done