# refer to src/sal/config.py for more options
filter_duplicates: true
approach: beam_search
n: 16
prm_batch_size: 4
search_batch_size: 1
dataset_start: 0
dataset_end: 100 #30 #
dataset_name: HuggingFaceH4/MATH-500
dataset_split: test
seed: 42
model_path: Qwen/Qwen2.5-Math-1.5B-Instruct #Meta-Llama/Llama-3.2-3B-Instruct Qwen/Qwen2.5-Math-7B-Instruct #
output_dir: /cmlscratch/agrawal5/Reward_hacking/scripts/ThinkPRM/output/results//MATH-100/R1-7B/beam_search_thinkprm/
temperature: .6
prm_temperature: 0.4
prm_n: 4
prm_type: thinkprm
verifier_endpoint: "http://127.0.0.1:31111"
system_prompt: "Solve the following math problem efficiently and clearly:\n\n- For simple problems (2 steps or fewer):\nProvide a concise solution with minimal explanation.\n\n- For complex problems (3 steps or more):\nUse this step-by-step format:\n\n## Step 1: [Concise description]\n[Brief explanation and calculations]\n\n## Step 2: [Concise description]\n[Brief explanation and calculations]\n\n...\n\nRegardless of the approach, always conclude with:\n\nTherefore, the final answer is: $\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression that solves the problem."
max_tokens: 4096
num_iterations: 20
beam_width: 4