# for dataset in math
# do
#     for model in Qwen2_5_3b_chat
#     do 
#         for ablation in exploration selection expansion
#         do
#             # python llm_reason.py --dataset $dataset --model $model --method woc --reward skywork --n_samples 200 --roll_num 32 --beam_width 1 --max_depth_allowed 3 --ablation $ablation --remote
#             python llm_reason.py --dataset $dataset --model $model --method woc --reward skyworko1 --n_samples 200 --roll_num 32 --beam_width 2 --max_depth_allowed 3 --ablation $ablation --remote
#         done
#     done
# done 
# python llm_reason.py --dataset gsm8k --model Qwen2_5_3b_chat --method woc --reward skywork --n_samples 200 --roll_num 32 --beam_width 1 --max_depth_allowed 3 --ablation exploration

python llm_reason.py --dataset math --model Qwen2_5_3b_chat --method woc --reward skyworko1 --n_samples 100 --roll_num 32 --beam_width 2 --max_depth_allowed 3 --ablation expansion