models=(
    # claude-4-sonnet
    # claude-4-sonnet-web-search
    #o3
    #o4-mini
    # o4-mini_high
    # gpt-4.1
    # gpt-4.1-web-search
    #react-url-claude-4-sonnet-100
    #react-url-claude-4-sonnet-thinking-100

    # gpt-researcher
    #o4-mini-deep-research
    #hf-odr
   #drreact-o3-100
   #drreact-o3-10
   #drreact-o3-50

    #claude-4-sonnet
    #drreact-claude-4-sonnet-10
    #drreact-claude-4-sonnet-50
    #drreact-claude-4-sonnet-100

    #search-o1-tool-claude-4-sonnet-1
    #search-o1-tool-claude-4-sonnet-5
    #search-o1-tool-claude-4-sonnet-100
    #o3

    #drreact-summ-o3-150-50
    #drreact-summ-o3-100-50

    #drreact-summ-o3-150-25
    #drreact-summ-o3-100-25

    #react-o3-10
    #react-o3-1
    #react-o3-5

    #react-o4-mini-1
    #react-o4-mini-5
    #react-o4-mini-10

    #react-claude-4-sonnet-1
    #react-claude-4-sonnet-5
    #react-claude-4-sonnet-10
    #search-o1-tool-o4-mini-25
    #drreact-summ-o4-mini-150-50

    drreact-summ-claude-4-sonnet-100-50
    #drreact-summ-claude-4-sonnet-150-50

)
n=300
seed=0

for model in "${models[@]}"; do
    # no debug, final runs
    for seed in {0..0}; do
        python -m simple-evals.simple_evals --eval browsecomp,hle_text --model $model --output-dir /scratch/gpfs/hyen/simple-evals/outputs/${model} --n-threads 8 --tag "v3_${n}_${seed}" --examples $n --model_seed $seed --checkpoint-interval 5
        #python -m simple-evals.simple_evals --eval browsecomp,hle_text --model $model --output-dir /scratch/gpfs/hyen/simple-evals/outputs/${model} --n-threads 8 --tag "v2_${n}_${seed}" --examples $n --model_seed $seed
        echo ''
    done
done

#python simple-evals/scripts/collect_results.py \
    #--models gpt-4.1,gpt-4.1-web-search,claude-4-sonnet,claude-4-sonnet-web-search,react-web-claude-4-sonnet,o3,react-url-o3,react-url-o3-100,o4-mini,hf-odr,gpt-researcher,react-o4-mini,react-web-o4-mini,react-url-o4-mini,qwen2.5-7b,react-web-qwen2.5-7b,search-r1-qwen2.5-7b,search-r1-qwen2.5-7b-em-ppo,qwen2.5-7b-it,react-web-qwen2.5-7b-it,search-r1-qwen2.5-7b-it,search-r1-qwen2.5-7b-it-em-ppo,qwen3-8b,react-web-qwen3-8b,search-r1-qwen3-8b,hosted_vllm-qwen3-32b,react_vllm-qwen3-32b \
    #--evals hle_text,browsecomp,healthbench_hard \
    #--output-dir simple-evals/outputs --tag v1_${n} --output-csv simple-evals/outputs/leaderboard.csv --seeds 3


