for dataset in gsm8k olympiadbench
do
    python llm_reason.py --method sc --dataset $dataset --roll_num 32 --remote --n_samples 500
    python llm_reason.py --method bestn --dataset $dataset --roll_num 32 --reward skywork --remote --n_samples 500
    python llm_reason.py --method bestn --dataset $dataset --roll_num 32 --reward skyworko1 --remote --n_samples 500
done 
# for temperature in 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5
# do
    # python llm_reason.py --method sc --dataset math --roll_num 16 --remote --n_samples 200 --model Qwen2_5_3b_chat --temperature $temperature 
    # python llm_reason.py --method sc --dataset math --roll_num 16 --remote --n_samples 200 --model Llama3_1_8b_chat --temperature $temperature 
    # python llm_reason.py --method bestn --dataset math --roll_num 16 --reward skywork --model Qwen2_5_3b_chat --n_samples 200 --remote --temperature $temperature
    # python llm_reason.py --method bestn --dataset math --roll_num 128 --reward armorm --model Qwen2_5_3b_chat --n_samples 200 --remote --temperature $temperature 
#     python llm_reason.py --method bestn --dataset math --roll_num 16 --reward skyworko1 --model Qwen2_5_3b_chat --n_samples 200 --temperature $temperature --agg last
#     # python llm_reason.py --method bestn --dataset math --roll_num 128 --reward self-Qwen2_5_3b_chat --model Qwen2_5_3b_chat --n_samples 200 --remote --temperature $temperature 
# done 

# for model in Qwen2_5_14b_chat Gemma2_9b_chat Llama2_13b_chat Mistral_7b_chat Qwen2_5_3b_chat Llama3_1_8b_chat
# do 
#     python llm_reason.py --method bestn --dataset math --roll_num 10 --reward skyworko1 --model $model --n_samples 500 --remote
# done 
# for dataset in gsm8k math aqua gpqa_diamond gpqa_extended gpqa_main csqa wino proofwriter prontoqa
# do
#     python llm_reason.py --method reward_sc --dataset $dataset --roll_num 10 --reward skywork --remote

# for dataset in math gsm8k
# do
#     for model in Qwen2_5_14b_chat Gemma2_9b_chat Llama2_13b_chat Mistral_7b_chat Qwen2_5_3b_chat Llama3_1_8b_chat
#     do 
#         python llm_reason.py --method bestn --dataset $dataset --roll_num 128 --reward skywork --model $model --n_samples 500 --remote
#         python llm_reason.py --method bestn --dataset $dataset --roll_num 128 --reward shepherd  --model $model --n_samples 500 --remote
#         python llm_reason.py --method bestn --dataset $dataset --roll_num 128 --reward armorm --model $model --n_samples 500 --remote
#         python llm_reason.py --method bestn --dataset $dataset --roll_num 128 --reward skyworko1 --model $model --n_samples 500 --remote
#     done 
# done 
# for model in Qwen2_5_14b_chat Gemma2_9b_chat Llama2_13b_chat Mistral_7b_chat Llama3_1_8b_chat
# do 
#     python llm_reason.py --method bestn --dataset math --roll_num 10 --reward self-$model --model $model --n_samples 500
# done 
# python llm_reason.py --dataset math --method mcts --model Qwen2_5_3b_chat --reward self-Qwen2_5_3b_chat --n_samples 200  --roll_num 16 --mcts_num_last_votes 5
# # python llm_reason.py --dataset math --method mcts --model Qwen2_5_3b_chat --reward skyworko1 --n_samples 200 --roll_num 32
# python llm_reason.py --method bestn --dataset math --roll_num 128 --reward skyworko1 --model Qwen2_5_3b_chat --n_samples 200  --remote --agg last
# python llm_reason.py --method bestn --dataset math --roll_num 128 --reward skyworko1 --model Qwen2_5_3b_chat --n_samples 200  --remote --agg min
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Llama3_1_8b_chat --reward shepherd
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Llama3_1_8b_chat --reward armorm
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Llama3_1_8b_chat --reward self-Llama3_1_8b_chat
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Llama3_1_8b_chat --reward grm
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Qwen2_5_3b_chat --reward grm
# python llm_reason.py --method bestn --dataset math --roll_num 128  --remote --n_samples 500 --model Llama3_1_8b_chat --reward skyworko1 -agg last