# refer to src/sal/config.py for more options
filter_duplicates: true
approach: best_of_n
n: 128
search_batch_size: 1
dataset_start: 0
dataset_end: 100
dataset_name: HuggingFaceH4/MATH-500
dataset_split: test
prm_batch_size: 16
seed: 42
temperature: 0.8
model_path: Meta-Llama/Llama-3.2-3B-Instruct #Qwen/Qwen2.5-Math-1.5B-Instruct
verifier_endpoint: "http://127.0.0.1:31111"
prm_path: launch/ThinkPRM-7B
output_dir: /cmlscratch/agrawal5/Reward_hacking/scripts/ThinkPRM/output/results//MATH-100/R1-7B/best_of_n_thinkprm_OOD/
prm_n: 1
prm_temperature: 0.0
prm_type: thinkprm
system_prompt: "Solve the following math problem efficiently and clearly:\n\n- For simple problems (2 steps or fewer):\nProvide a concise solution with minimal explanation.\n\n- For complex problems (3 steps or more):\nUse this step-by-step format:\n\n## Step 1: [Concise description]\n[Brief explanation and calculations]\n\n## Step 2: [Concise description]\n[Brief explanation and calculations]\n\n...\n\nRegardless of the approach, always conclude with:\n\nTherefore, the final answer is: $\\boxed{answer}$. I hope it is correct.\n\nWhere [answer] is just the final number or expression"
