python llm_reason.py --model Qwen2.5_3b_chat --method mcts --reward skywork --n_samples 200 --roll_num 16 --mcts_exploration_weight 1.0
python llm_reason.py --model Qwen2.5_3b_chat --method mcts --reward skywork --n_samples 200 --roll_num 16 --mcts_exploration_weight 10.0
python llm_reason.py --model Qwen2.5_3b_chat --method mcts --reward skyworko1 --n_samples 200 --roll_num 16 --mcts_exploration_weight 1.0
